You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2009/02/25 06:59:27 UTC
svn commit: r747672 [3/4] - in /hadoop/hbase/trunk: conf/
src/java/org/apache/hadoop/hbase/io/
src/java/org/apache/hadoop/hbase/io/hfile/
src/java/org/apache/hadoop/hbase/regionserver/
src/test/org/apache/hadoop/hbase/ src/test/org/apache/hadoop/hbase/...
Added: hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java?rev=747672&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java (added)
+++ hadoop/hbase/trunk/src/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java Wed Feb 25 05:59:26 2009
@@ -0,0 +1,336 @@
+/**
+ * Copyright 2008 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.regionserver;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HStoreKey;
+import org.apache.hadoop.hbase.filter.RowFilterInterface;
+import org.apache.hadoop.hbase.io.Cell;
+import org.apache.hadoop.hbase.util.Bytes;
+
+/**
+ * Scanner scans both the memcache and the HStore
+ */
+class StoreScanner implements InternalScanner, ChangedReadersObserver {
+ static final Log LOG = LogFactory.getLog(StoreScanner.class);
+
+ private InternalScanner[] scanners;
+ private TreeMap<byte [], Cell>[] resultSets;
+ private HStoreKey[] keys;
+ private boolean wildcardMatch = false;
+ private boolean multipleMatchers = false;
+ private RowFilterInterface dataFilter;
+ private Store store;
+ private final long timestamp;
+ private final byte [][] targetCols;
+
+ // Indices for memcache scanner and hstorefile scanner.
+ private static final int MEMS_INDEX = 0;
+ private static final int HSFS_INDEX = MEMS_INDEX + 1;
+
+ // Used around transition from no storefile to the first.
+ private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
+
+ // Used to indicate that the scanner has closed (see HBASE-1107)
+ private final AtomicBoolean closing = new AtomicBoolean(false);
+
+ /** Create an Scanner with a handle on the memcache and HStore files. */
+ @SuppressWarnings("unchecked")
+ StoreScanner(Store store, byte [][] targetCols, byte [] firstRow,
+ long timestamp, RowFilterInterface filter)
+ throws IOException {
+ this.store = store;
+ this.dataFilter = filter;
+ if (null != dataFilter) {
+ dataFilter.reset();
+ }
+ this.scanners = new InternalScanner[2];
+ this.resultSets = new TreeMap[scanners.length];
+ this.keys = new HStoreKey[scanners.length];
+ // Save these args in case we need them later handling change in readers
+ // See updateReaders below.
+ this.timestamp = timestamp;
+ this.targetCols = targetCols;
+ try {
+ scanners[MEMS_INDEX] =
+ store.memcache.getScanner(timestamp, targetCols, firstRow);
+ scanners[HSFS_INDEX] =
+ new StoreFileScanner(store, timestamp, targetCols, firstRow);
+ for (int i = MEMS_INDEX; i < scanners.length; i++) {
+ checkScannerFlags(i);
+ }
+ } catch (IOException e) {
+ doClose();
+ throw e;
+ }
+
+ // Advance to the first key in each scanner.
+ // All results will match the required column-set and scanTime.
+ for (int i = MEMS_INDEX; i < scanners.length; i++) {
+ setupScanner(i);
+ }
+
+ this.store.addChangedReaderObserver(this);
+ }
+
+ /*
+ * @param i Index.
+ */
+ private void checkScannerFlags(final int i) {
+ if (this.scanners[i].isWildcardScanner()) {
+ this.wildcardMatch = true;
+ }
+ if (this.scanners[i].isMultipleMatchScanner()) {
+ this.multipleMatchers = true;
+ }
+ }
+
+ /*
+ * Do scanner setup.
+ * @param i
+ * @throws IOException
+ */
+ private void setupScanner(final int i) throws IOException {
+ this.keys[i] = new HStoreKey();
+ this.resultSets[i] = new TreeMap<byte [], Cell>(Bytes.BYTES_COMPARATOR);
+ if (this.scanners[i] != null && !this.scanners[i].next(this.keys[i],
+ this.resultSets[i])) {
+ closeScanner(i);
+ }
+ }
+
+ /** @return true if the scanner is a wild card scanner */
+ public boolean isWildcardScanner() {
+ return this.wildcardMatch;
+ }
+
+ /** @return true if the scanner is a multiple match scanner */
+ public boolean isMultipleMatchScanner() {
+ return this.multipleMatchers;
+ }
+
+ public boolean next(HStoreKey key, SortedMap<byte [], Cell> results)
+ throws IOException {
+ this.lock.readLock().lock();
+ try {
+ // Filtered flag is set by filters. If a cell has been 'filtered out'
+ // -- i.e. it is not to be returned to the caller -- the flag is 'true'.
+ boolean filtered = true;
+ boolean moreToFollow = true;
+ while (filtered && moreToFollow) {
+ // Find the lowest-possible key.
+ byte [] chosenRow = null;
+ long chosenTimestamp = -1;
+ for (int i = 0; i < this.keys.length; i++) {
+ if (scanners[i] != null &&
+ (chosenRow == null ||
+ (HStoreKey.compareTwoRowKeys(this.keys[i].getRow(), chosenRow) < 0) ||
+ ((HStoreKey.compareTwoRowKeys(this.keys[i].getRow(), chosenRow) == 0) &&
+ (keys[i].getTimestamp() > chosenTimestamp)))) {
+ chosenRow = keys[i].getRow();
+ chosenTimestamp = keys[i].getTimestamp();
+ }
+ }
+
+ // Filter whole row by row key?
+ filtered = dataFilter != null? dataFilter.filterRowKey(chosenRow) : false;
+
+ // Store the key and results for each sub-scanner. Merge them as
+ // appropriate.
+ if (chosenTimestamp >= 0 && !filtered) {
+ // Here we are setting the passed in key with current row+timestamp
+ key.setRow(chosenRow);
+ key.setVersion(chosenTimestamp);
+ key.setColumn(HConstants.EMPTY_BYTE_ARRAY);
+ // Keep list of deleted cell keys within this row. We need this
+ // because as we go through scanners, the delete record may be in an
+ // early scanner and then the same record with a non-delete, non-null
+ // value in a later. Without history of what we've seen, we'll return
+ // deleted values. This List should not ever grow too large since we
+ // are only keeping rows and columns that match those set on the
+ // scanner and which have delete values. If memory usage becomes a
+ // problem, could redo as bloom filter.
+ Set<HStoreKey> deletes = new HashSet<HStoreKey>();
+ for (int i = 0; i < scanners.length && !filtered; i++) {
+ while ((scanners[i] != null
+ && !filtered
+ && moreToFollow)
+ && (HStoreKey.compareTwoRowKeys(this.keys[i].getRow(), chosenRow) == 0)) {
+ // If we are doing a wild card match or there are multiple
+ // matchers per column, we need to scan all the older versions of
+ // this row to pick up the rest of the family members
+ if (!wildcardMatch
+ && !multipleMatchers
+ && (keys[i].getTimestamp() != chosenTimestamp)) {
+ break;
+ }
+
+ // NOTE: We used to do results.putAll(resultSets[i]);
+ // but this had the effect of overwriting newer
+ // values with older ones. So now we only insert
+ // a result if the map does not contain the key.
+ HStoreKey hsk = new HStoreKey(key.getRow(),
+ HConstants.EMPTY_BYTE_ARRAY,
+ key.getTimestamp());
+ for (Map.Entry<byte [], Cell> e : resultSets[i].entrySet()) {
+ hsk.setColumn(e.getKey());
+ if (HLogEdit.isDeleted(e.getValue().getValue())) {
+ // Only first key encountered is added; deletes is a Set.
+ deletes.add(new HStoreKey(hsk));
+ } else if (!deletes.contains(hsk) &&
+ !filtered &&
+ moreToFollow &&
+ !results.containsKey(e.getKey())) {
+ if (dataFilter != null) {
+ // Filter whole row by column data?
+ filtered = dataFilter.filterColumn(chosenRow, e.getKey(),
+ e.getValue().getValue());
+ if (filtered) {
+ results.clear();
+ break;
+ }
+ }
+ results.put(e.getKey(), e.getValue());
+ }
+ }
+ resultSets[i].clear();
+ if (!scanners[i].next(keys[i], resultSets[i])) {
+ closeScanner(i);
+ }
+ }
+ }
+ }
+ for (int i = 0; i < scanners.length; i++) {
+ // If the current scanner is non-null AND has a lower-or-equal
+ // row label, then its timestamp is bad. We need to advance it.
+ while ((scanners[i] != null) &&
+ (HStoreKey.compareTwoRowKeys(this.keys[i].getRow(), chosenRow) <= 0)) {
+ resultSets[i].clear();
+ if (!scanners[i].next(keys[i], resultSets[i])) {
+ closeScanner(i);
+ }
+ }
+ }
+
+ moreToFollow = chosenTimestamp >= 0;
+ if (dataFilter != null) {
+ if (dataFilter.filterAllRemaining()) {
+ moreToFollow = false;
+ }
+ }
+
+ if (results.size() <= 0 && !filtered) {
+ // There were no results found for this row. Marked it as
+ // 'filtered'-out otherwise we will not move on to the next row.
+ filtered = true;
+ }
+ }
+
+ // If we got no results, then there is no more to follow.
+ if (results == null || results.size() <= 0) {
+ moreToFollow = false;
+ }
+
+ // Make sure scanners closed if no more results
+ if (!moreToFollow) {
+ for (int i = 0; i < scanners.length; i++) {
+ if (null != scanners[i]) {
+ closeScanner(i);
+ }
+ }
+ }
+
+ return moreToFollow;
+ } finally {
+ this.lock.readLock().unlock();
+ }
+ }
+
+ /** Shut down a single scanner */
+ void closeScanner(int i) {
+ try {
+ try {
+ scanners[i].close();
+ } catch (IOException e) {
+ LOG.warn(store.storeName + " failed closing scanner " + i, e);
+ }
+ } finally {
+ scanners[i] = null;
+ keys[i] = null;
+ resultSets[i] = null;
+ }
+ }
+
+ public void close() {
+ this.closing.set(true);
+ this.store.deleteChangedReaderObserver(this);
+ doClose();
+ }
+
+ private void doClose() {
+ for (int i = MEMS_INDEX; i < scanners.length; i++) {
+ if (scanners[i] != null) {
+ closeScanner(i);
+ }
+ }
+ }
+
+ // Implementation of ChangedReadersObserver
+
+ public void updateReaders() throws IOException {
+ if (this.closing.get()) {
+ return;
+ }
+ this.lock.writeLock().lock();
+ try {
+ Map<Long, StoreFile> map = this.store.getStorefiles();
+ if (this.scanners[HSFS_INDEX] == null && map != null && map.size() > 0) {
+ // Presume that we went from no readers to at least one -- need to put
+ // a HStoreScanner in place.
+ try {
+ // I think its safe getting key from mem at this stage -- it shouldn't have
+ // been flushed yet
+ this.scanners[HSFS_INDEX] = new StoreFileScanner(this.store,
+ this.timestamp, this. targetCols, this.keys[MEMS_INDEX].getRow());
+ checkScannerFlags(HSFS_INDEX);
+ setupScanner(HSFS_INDEX);
+ LOG.debug("Added a StoreFileScanner to outstanding HStoreScanner");
+ } catch (IOException e) {
+ doClose();
+ throw e;
+ }
+ }
+ } finally {
+ this.lock.writeLock().unlock();
+ }
+ }
+}
\ No newline at end of file
Added: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/HFilePerformanceEvaluation.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/HFilePerformanceEvaluation.java?rev=747672&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/HFilePerformanceEvaluation.java (added)
+++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/HFilePerformanceEvaluation.java Wed Feb 25 05:59:26 2009
@@ -0,0 +1,365 @@
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Random;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.commons.math.random.RandomData;
+import org.apache.commons.math.random.RandomDataImpl;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.io.hfile.HFileScanner;
+import org.apache.hadoop.hbase.util.Bytes;
+
+/**
+ * <p>
+ * This class runs performance benchmarks for {@link HFile}.
+ * </p>
+ */
+public class HFilePerformanceEvaluation {
+
+ private static final int ROW_LENGTH = 10;
+ private static final int ROW_COUNT = 1000000;
+ private static final int RFILE_BLOCKSIZE = 8 * 1024;
+
+ static final Log LOG =
+ LogFactory.getLog(HFilePerformanceEvaluation.class.getName());
+
+ static byte [] format(final int i) {
+ String v = Integer.toString(i);
+ return Bytes.toBytes("0000000000".substring(v.length()) + v);
+ }
+
+ static ImmutableBytesWritable format(final int i, ImmutableBytesWritable w) {
+ w.set(format(i));
+ return w;
+ }
+
+ private void runBenchmarks() throws Exception {
+ final Configuration conf = new Configuration();
+ final FileSystem fs = FileSystem.get(conf);
+ final Path mf = fs.makeQualified(new Path("performanceevaluation.mapfile"));
+ if (fs.exists(mf)) {
+ fs.delete(mf, true);
+ }
+
+ runBenchmark(new SequentialWriteBenchmark(conf, fs, mf, ROW_COUNT),
+ ROW_COUNT);
+ PerformanceEvaluationCommons.concurrentReads(new Runnable() {
+ public void run() {
+ try {
+ runBenchmark(new UniformRandomSmallScan(conf, fs, mf, ROW_COUNT),
+ ROW_COUNT);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+ });
+ PerformanceEvaluationCommons.concurrentReads(new Runnable() {
+ public void run() {
+ try {
+ runBenchmark(new UniformRandomReadBenchmark(conf, fs, mf, ROW_COUNT),
+ ROW_COUNT);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+ });
+ PerformanceEvaluationCommons.concurrentReads(new Runnable() {
+ public void run() {
+ try {
+ runBenchmark(new GaussianRandomReadBenchmark(conf, fs, mf, ROW_COUNT),
+ ROW_COUNT);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+ });
+ PerformanceEvaluationCommons.concurrentReads(new Runnable() {
+ public void run() {
+ try {
+ runBenchmark(new SequentialReadBenchmark(conf, fs, mf, ROW_COUNT),
+ ROW_COUNT);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+ });
+
+ }
+
+ private void runBenchmark(RowOrientedBenchmark benchmark, int rowCount)
+ throws Exception {
+ LOG.info("Running " + benchmark.getClass().getSimpleName() + " for " +
+ rowCount + " rows.");
+ long elapsedTime = benchmark.run();
+ LOG.info("Running " + benchmark.getClass().getSimpleName() + " for " +
+ rowCount + " rows took " + elapsedTime + "ms.");
+ }
+
+ static abstract class RowOrientedBenchmark {
+
+ protected final Configuration conf;
+ protected final FileSystem fs;
+ protected final Path mf;
+ protected final int totalRows;
+
+ public RowOrientedBenchmark(Configuration conf, FileSystem fs, Path mf,
+ int totalRows) {
+ this.conf = conf;
+ this.fs = fs;
+ this.mf = mf;
+ this.totalRows = totalRows;
+ }
+
+ void setUp() throws Exception {
+ // do nothing
+ }
+
+ abstract void doRow(int i) throws Exception;
+
+ protected int getReportingPeriod() {
+ return this.totalRows / 10;
+ }
+
+ void tearDown() throws Exception {
+ // do nothing
+ }
+
+ /**
+ * Run benchmark
+ * @return elapsed time.
+ * @throws Exception
+ */
+ long run() throws Exception {
+ long elapsedTime;
+ setUp();
+ long startTime = System.currentTimeMillis();
+ try {
+ for (int i = 0; i < totalRows; i++) {
+ if (i > 0 && i % getReportingPeriod() == 0) {
+ LOG.info("Processed " + i + " rows.");
+ }
+ doRow(i);
+ }
+ elapsedTime = System.currentTimeMillis() - startTime;
+ } finally {
+ tearDown();
+ }
+ return elapsedTime;
+ }
+
+ }
+
+ static class SequentialWriteBenchmark extends RowOrientedBenchmark {
+ protected HFile.Writer writer;
+ private Random random = new Random();
+ private byte[] bytes = new byte[ROW_LENGTH];
+
+ public SequentialWriteBenchmark(Configuration conf, FileSystem fs, Path mf,
+ int totalRows) {
+ super(conf, fs, mf, totalRows);
+ }
+
+ @Override
+ void setUp() throws Exception {
+ writer = new HFile.Writer(this.fs, this.mf, RFILE_BLOCKSIZE, null, null);
+ }
+
+ @Override
+ void doRow(int i) throws Exception {
+ writer.append(format(i), generateValue());
+ }
+
+ private byte[] generateValue() {
+ random.nextBytes(bytes);
+ return bytes;
+ }
+
+ @Override
+ protected int getReportingPeriod() {
+ return this.totalRows; // don't report progress
+ }
+
+ @Override
+ void tearDown() throws Exception {
+ writer.close();
+ }
+
+ }
+
+ static abstract class ReadBenchmark extends RowOrientedBenchmark {
+ ImmutableBytesWritable key = new ImmutableBytesWritable();
+ ImmutableBytesWritable value = new ImmutableBytesWritable();
+
+ protected HFile.Reader reader;
+
+ public ReadBenchmark(Configuration conf, FileSystem fs, Path mf,
+ int totalRows) {
+ super(conf, fs, mf, totalRows);
+ }
+
+ @Override
+ void setUp() throws Exception {
+ reader = new HFile.Reader(this.fs, this.mf, null);
+ this.reader.loadFileInfo();
+ }
+
+ @Override
+ void tearDown() throws Exception {
+ reader.close();
+ }
+
+ }
+
+ static class SequentialReadBenchmark extends ReadBenchmark {
+ private HFileScanner scanner;
+
+ public SequentialReadBenchmark(Configuration conf, FileSystem fs,
+ Path mf, int totalRows)
+ throws IOException {
+ super(conf, fs, mf, totalRows);
+ }
+
+ @Override
+ void setUp() throws Exception {
+ super.setUp();
+ this.scanner = this.reader.getScanner();
+ this.scanner.seekTo();
+ }
+
+ @Override
+ void doRow(@SuppressWarnings("unused") int i) throws Exception {
+ if (this.scanner.next()) {
+ ByteBuffer k = this.scanner.getKey();
+ PerformanceEvaluationCommons.assertKey(format(i + 1), k);
+ ByteBuffer v = scanner.getValue();
+ PerformanceEvaluationCommons.assertValueSize(v.limit(), ROW_LENGTH);
+ }
+ }
+
+ @Override
+ protected int getReportingPeriod() {
+ return this.totalRows; // don't report progress
+ }
+
+ }
+
+ static class UniformRandomReadBenchmark extends ReadBenchmark {
+
+ private Random random = new Random();
+
+ public UniformRandomReadBenchmark(Configuration conf, FileSystem fs,
+ Path mf, int totalRows) {
+ super(conf, fs, mf, totalRows);
+ }
+
+ @Override
+ void doRow(@SuppressWarnings("unused") int i) throws Exception {
+ HFileScanner scanner = this.reader.getScanner();
+ byte [] b = getRandomRow();
+ scanner.seekTo(b);
+ ByteBuffer k = scanner.getKey();
+ PerformanceEvaluationCommons.assertKey(b, k);
+ ByteBuffer v = scanner.getValue();
+ PerformanceEvaluationCommons.assertValueSize(v.limit(), ROW_LENGTH);
+ }
+
+ private byte [] getRandomRow() {
+ return format(random.nextInt(totalRows));
+ }
+ }
+
+ static class UniformRandomSmallScan extends ReadBenchmark {
+ private Random random = new Random();
+
+ public UniformRandomSmallScan(Configuration conf, FileSystem fs,
+ Path mf, int totalRows) {
+ super(conf, fs, mf, totalRows/10);
+ }
+
+ @Override
+ void doRow(@SuppressWarnings("unused") int i) throws Exception {
+ HFileScanner scanner = this.reader.getScanner();
+ byte [] b = getRandomRow();
+ if (scanner.seekTo(b) != 0) {
+ System.out.println("Nonexistent row: " + new String(b));
+ return;
+ }
+ ByteBuffer k = scanner.getKey();
+ PerformanceEvaluationCommons.assertKey(b, k);
+ // System.out.println("Found row: " + new String(b));
+ for (int ii = 0; ii < 30; ii++) {
+ if (!scanner.next()) {
+ System.out.println("NOTHING FOLLOWS");
+ }
+ ByteBuffer v = scanner.getValue();
+ PerformanceEvaluationCommons.assertValueSize(v.limit(), ROW_LENGTH);
+ }
+ }
+
+ private byte [] getRandomRow() {
+ return format(random.nextInt(totalRows));
+ }
+ }
+
+ static class GaussianRandomReadBenchmark extends ReadBenchmark {
+
+ private RandomData randomData = new RandomDataImpl();
+
+ public GaussianRandomReadBenchmark(Configuration conf, FileSystem fs,
+ Path mf, int totalRows) {
+ super(conf, fs, mf, totalRows);
+ }
+
+ @Override
+ void doRow(@SuppressWarnings("unused") int i) throws Exception {
+ HFileScanner scanner = this.reader.getScanner();
+ scanner.seekTo(getGaussianRandomRowBytes());
+ for (int ii = 0; ii < 30; ii++) {
+ if (!scanner.next()) {
+ System.out.println("NOTHING FOLLOWS");
+ }
+ scanner.getKey();
+ scanner.getValue();
+ }
+ }
+
+ private byte [] getGaussianRandomRowBytes() {
+ int r = (int) randomData.nextGaussian(totalRows / 2, totalRows / 10);
+ return format(r);
+ }
+ }
+
+ /**
+ * @param args
+ * @throws IOException
+ */
+ public static void main(String[] args) throws Exception {
+ new HFilePerformanceEvaluation().runBenchmarks();
+ }
+}
\ No newline at end of file
Added: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/PerformanceEvaluationCommons.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/PerformanceEvaluationCommons.java?rev=747672&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/PerformanceEvaluationCommons.java (added)
+++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/PerformanceEvaluationCommons.java Wed Feb 25 05:59:26 2009
@@ -0,0 +1,78 @@
+/**
+ * Copyright 2009 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+
+/**
+ * Code shared by PE tests.
+ */
+public class PerformanceEvaluationCommons {
+ static final Log LOG =
+ LogFactory.getLog(PerformanceEvaluationCommons.class.getName());
+
+ public static void assertValueSize(final int expectedSize, final int got) {
+ if (got != expectedSize) {
+ throw new AssertionError("Expected " + expectedSize + " but got " + got);
+ }
+ }
+
+ public static void assertKey(final byte [] expected, final ByteBuffer got) {
+ byte [] b = new byte[got.limit()];
+ got.get(b, 0, got.limit());
+ assertKey(expected, b);
+ }
+
+ public static void assertKey(final byte [] expected, final byte [] got) {
+ if (!org.apache.hadoop.hbase.util.Bytes.equals(expected, got)) {
+ throw new AssertionError("Expected " +
+ org.apache.hadoop.hbase.util.Bytes.toString(expected) +
+ " but got " + org.apache.hadoop.hbase.util.Bytes.toString(got));
+ }
+ }
+
+ public static void concurrentReads(final Runnable r) {
+ final int count = 1;
+ long now = System.currentTimeMillis();
+ List<Thread> threads = new ArrayList<Thread>(count);
+ for (int i = 0; i < count; i++) {
+ Thread t = new Thread(r);
+ t.setName("" + i);
+ threads.add(t);
+ }
+ for (Thread t: threads) {
+ t.start();
+ }
+ for (Thread t: threads) {
+ try {
+ t.join();
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+ }
+ LOG.info("Test took " + (System.currentTimeMillis() - now));
+ }
+}
\ No newline at end of file
Added: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/TestHStoreKey.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/TestHStoreKey.java?rev=747672&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/TestHStoreKey.java (added)
+++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/TestHStoreKey.java Wed Feb 25 05:59:26 2009
@@ -0,0 +1,128 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.hbase.HStoreKey.StoreKeyByteComparator;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.Writables;
+
+/**
+ * Tests for the HStoreKey Plain and Meta RawComparators.
+ */
+public class TestHStoreKey extends TestCase {
+ protected void setUp() throws Exception {
+ super.setUp();
+ }
+
+ protected void tearDown() throws Exception {
+ super.tearDown();
+ }
+
+ public void testByteBuffer() throws Exception {
+ final long ts = 123;
+ final byte [] row = Bytes.toBytes("row");
+ final byte [] column = Bytes.toBytes("column");
+ HStoreKey hsk = new HStoreKey(row, column, ts);
+ ByteBuffer bb = ByteBuffer.wrap(hsk.getBytes());
+ assertTrue(Bytes.equals(row, HStoreKey.getRow(bb)));
+ assertTrue(Bytes.equals(column, HStoreKey.getColumn(bb)));
+ assertEquals(ts, HStoreKey.getTimestamp(bb));
+ }
+
+ /**
+ * Test the byte comparator works same as the object comparator.
+ */
+ public void testRawComparator() throws IOException {
+ long timestamp = System.currentTimeMillis();
+ byte [] a = Bytes.toBytes("a");
+ HStoreKey past = new HStoreKey(a, a, timestamp - 10);
+ byte [] pastBytes = Writables.getBytes(past);
+ HStoreKey now = new HStoreKey(a, a, timestamp);
+ byte [] nowBytes = Writables.getBytes(now);
+ HStoreKey future = new HStoreKey(a, a, timestamp + 10);
+ byte [] futureBytes = Writables.getBytes(future);
+ StoreKeyByteComparator comparator = new HStoreKey.StoreKeyByteComparator();
+ assertTrue(past.compareTo(now) > 0);
+ assertTrue(comparator.compare(pastBytes, nowBytes) > 0);
+ assertTrue(now.compareTo(now) == 0);
+ assertTrue(comparator.compare(nowBytes, nowBytes) == 0);
+ assertTrue(future.compareTo(now) < 0);
+ assertTrue(comparator.compare(futureBytes, nowBytes) < 0);
+ // Check that empty column comes before one with a column
+ HStoreKey nocolumn = new HStoreKey(a, timestamp);
+ byte [] nocolumnBytes = Writables.getBytes(nocolumn);
+ HStoreKey withcolumn = new HStoreKey(a, a, timestamp);
+ byte [] withcolumnBytes = Writables.getBytes(withcolumn);
+ assertTrue(nocolumn.compareTo(withcolumn) < 0);
+ assertTrue(comparator.compare(nocolumnBytes, withcolumnBytes) < 0);
+ // Check that empty column comes and LATEST comes before one with a column
+ // and old timestamp.
+ nocolumn = new HStoreKey(a, HConstants.LATEST_TIMESTAMP);
+ nocolumnBytes = Writables.getBytes(nocolumn);
+ withcolumn = new HStoreKey(a, a, timestamp);
+ withcolumnBytes = Writables.getBytes(withcolumn);
+ assertTrue(nocolumn.compareTo(withcolumn) < 0);
+ assertTrue(comparator.compare(nocolumnBytes, withcolumnBytes) < 0);
+ }
+
+// /**
+// * Tests cases where rows keys have characters below the ','.
+// * See HBASE-832
+// * @throws IOException
+// */
+// public void testHStoreKeyBorderCases() throws IOException {
+// HRegionInfo info = new HRegionInfo(new HTableDescriptor("testtable"),
+// HConstants.EMPTY_BYTE_ARRAY, HConstants.EMPTY_BYTE_ARRAY);
+//
+// HStoreKey rowA = new HStoreKey("testtable,www.hbase.org/,1234",
+// "", Long.MAX_VALUE, info);
+// byte [] rowABytes = Writables.getBytes(rowA);
+// HStoreKey rowB = new HStoreKey("testtable,www.hbase.org/%20,99999",
+// "", Long.MAX_VALUE, info);
+// byte [] rowBBytes = Writables.getBytes(rowB);
+// assertTrue(rowA.compareTo(rowB) > 0);
+// HStoreKey.Comparator comparator = new HStoreKey.PlainStoreKeyComparator();
+// assertTrue(comparator.compare(rowABytes, rowBBytes) > 0);
+//
+// rowA = new HStoreKey("testtable,www.hbase.org/,1234",
+// "", Long.MAX_VALUE, HRegionInfo.FIRST_META_REGIONINFO);
+// rowB = new HStoreKey("testtable,www.hbase.org/%20,99999",
+// "", Long.MAX_VALUE, HRegionInfo.FIRST_META_REGIONINFO);
+// assertTrue(rowA.compareTo(rowB) < 0);
+// assertTrue(comparator.compare(rowABytes, rowBBytes) < 0);
+//
+// rowA = new HStoreKey("testtable,,1234",
+// "", Long.MAX_VALUE, HRegionInfo.FIRST_META_REGIONINFO);
+// rowB = new HStoreKey("testtable,$www.hbase.org/,99999",
+// "", Long.MAX_VALUE, HRegionInfo.FIRST_META_REGIONINFO);
+// assertTrue(rowA.compareTo(rowB) < 0);
+// assertTrue(comparator.compare(rowABytes, rowBBytes) < 0);
+//
+// rowA = new HStoreKey(".META.,testtable,www.hbase.org/,1234,4321",
+// "", Long.MAX_VALUE, HRegionInfo.ROOT_REGIONINFO);
+// rowB = new HStoreKey(".META.,testtable,www.hbase.org/%20,99999,99999",
+// "", Long.MAX_VALUE, HRegionInfo.ROOT_REGIONINFO);
+// assertTrue(rowA.compareTo(rowB) > 0);
+// assertTrue(comparator.compare(rowABytes, rowBBytes) > 0);
+// }
+}
\ No newline at end of file
Added: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/KVGenerator.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/KVGenerator.java?rev=747672&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/KVGenerator.java (added)
+++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/KVGenerator.java Wed Feb 25 05:59:26 2009
@@ -0,0 +1,111 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.hbase.io.hfile;
+
+import java.util.Random;
+
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.WritableComparator;
+
+/**
+ * Generate random <key, value> pairs.
+ * <p>
+ * Copied from
+ * <a href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 tfile</a>.
+ * Remove after tfile is committed and use the tfile version of this class
+ * instead.</p>
+ */
+class KVGenerator {
+ private final Random random;
+ private final byte[][] dict;
+ private final boolean sorted;
+ private final RandomDistribution.DiscreteRNG keyLenRNG, valLenRNG;
+ private BytesWritable lastKey;
+ private static final int MIN_KEY_LEN = 4;
+ private final byte prefix[] = new byte[MIN_KEY_LEN];
+
+ public KVGenerator(Random random, boolean sorted,
+ RandomDistribution.DiscreteRNG keyLenRNG,
+ RandomDistribution.DiscreteRNG valLenRNG,
+ RandomDistribution.DiscreteRNG wordLenRNG, int dictSize) {
+ this.random = random;
+ dict = new byte[dictSize][];
+ this.sorted = sorted;
+ this.keyLenRNG = keyLenRNG;
+ this.valLenRNG = valLenRNG;
+ for (int i = 0; i < dictSize; ++i) {
+ int wordLen = wordLenRNG.nextInt();
+ dict[i] = new byte[wordLen];
+ random.nextBytes(dict[i]);
+ }
+ lastKey = new BytesWritable();
+ fillKey(lastKey);
+ }
+
+ private void fillKey(BytesWritable o) {
+ int len = keyLenRNG.nextInt();
+ if (len < MIN_KEY_LEN) len = MIN_KEY_LEN;
+ o.setSize(len);
+ int n = MIN_KEY_LEN;
+ while (n < len) {
+ byte[] word = dict[random.nextInt(dict.length)];
+ int l = Math.min(word.length, len - n);
+ System.arraycopy(word, 0, o.get(), n, l);
+ n += l;
+ }
+ if (sorted
+ && WritableComparator.compareBytes(lastKey.get(), MIN_KEY_LEN, lastKey
+ .getSize()
+ - MIN_KEY_LEN, o.get(), MIN_KEY_LEN, o.getSize() - MIN_KEY_LEN) > 0) {
+ incrementPrefix();
+ }
+
+ System.arraycopy(prefix, 0, o.get(), 0, MIN_KEY_LEN);
+ lastKey.set(o);
+ }
+
+ private void fillValue(BytesWritable o) {
+ int len = valLenRNG.nextInt();
+ o.setSize(len);
+ int n = 0;
+ while (n < len) {
+ byte[] word = dict[random.nextInt(dict.length)];
+ int l = Math.min(word.length, len - n);
+ System.arraycopy(word, 0, o.get(), n, l);
+ n += l;
+ }
+ }
+
+ private void incrementPrefix() {
+ for (int i = MIN_KEY_LEN - 1; i >= 0; --i) {
+ ++prefix[i];
+ if (prefix[i] != 0) return;
+ }
+
+ throw new RuntimeException("Prefix overflown");
+ }
+
+ public void next(BytesWritable key, BytesWritable value, boolean dupKey) {
+ if (dupKey) {
+ key.set(lastKey);
+ }
+ else {
+ fillKey(key);
+ }
+ fillValue(value);
+ }
+}
Added: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/KeySampler.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/KeySampler.java?rev=747672&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/KeySampler.java (added)
+++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/KeySampler.java Wed Feb 25 05:59:26 2009
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.hbase.io.hfile;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Random;
+
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.hbase.io.hfile.RandomDistribution.DiscreteRNG;
+
+/*
+* <p>
+* Copied from
+* <a href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 tfile</a>.
+* Remove after tfile is committed and use the tfile version of this class
+* instead.</p>
+*/
+class KeySampler {
+ Random random;
+ int min, max;
+ DiscreteRNG keyLenRNG;
+ private static final int MIN_KEY_LEN = 4;
+
+ public KeySampler(Random random, byte [] first, byte [] last,
+ DiscreteRNG keyLenRNG) throws IOException {
+ this.random = random;
+ min = keyPrefixToInt(first);
+ max = keyPrefixToInt(last);
+ this.keyLenRNG = keyLenRNG;
+ }
+
+ private int keyPrefixToInt(byte [] key) throws IOException {
+ byte[] b = key;
+ int o = 0;
+ return (b[o] & 0xff) << 24 | (b[o + 1] & 0xff) << 16
+ | (b[o + 2] & 0xff) << 8 | (b[o + 3] & 0xff);
+ }
+
+ public void next(BytesWritable key) {
+ key.setSize(Math.max(MIN_KEY_LEN, keyLenRNG.nextInt()));
+ random.nextBytes(key.get());
+ int n = random.nextInt(max - min) + min;
+ byte[] b = key.get();
+ b[0] = (byte) (n >> 24);
+ b[1] = (byte) (n >> 16);
+ b[2] = (byte) (n >> 8);
+ b[3] = (byte) n;
+ }
+}
Added: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/NanoTimer.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/NanoTimer.java?rev=747672&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/NanoTimer.java (added)
+++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/NanoTimer.java Wed Feb 25 05:59:26 2009
@@ -0,0 +1,198 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.hbase.io.hfile;
+
+/**
+ * A nano-second timer.
+ * <p>
+ * Copied from
+ * <a href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 tfile</a>.
+ * Remove after tfile is committed and use the tfile version of this class
+ * instead.</p>
+ */
+public class NanoTimer {
+ private long last = -1;
+ private boolean started = false;
+ private long cumulate = 0;
+
+ /**
+ * Constructor
+ *
+ * @param start
+ * Start the timer upon construction.
+ */
+ public NanoTimer(boolean start) {
+ if (start) this.start();
+ }
+
+ /**
+ * Start the timer.
+ *
+ * Note: No effect if timer is already started.
+ */
+ public void start() {
+ if (!this.started) {
+ this.last = System.nanoTime();
+ this.started = true;
+ }
+ }
+
+ /**
+ * Stop the timer.
+ *
+ * Note: No effect if timer is already stopped.
+ */
+ public void stop() {
+ if (this.started) {
+ this.started = false;
+ this.cumulate += System.nanoTime() - this.last;
+ }
+ }
+
+ /**
+ * Read the timer.
+ *
+ * @return the elapsed time in nano-seconds. Note: If the timer is never
+ * started before, -1 is returned.
+ */
+ public long read() {
+ if (!readable()) return -1;
+
+ return this.cumulate;
+ }
+
+ /**
+ * Reset the timer.
+ */
+ public void reset() {
+ this.last = -1;
+ this.started = false;
+ this.cumulate = 0;
+ }
+
+ /**
+ * Checking whether the timer is started
+ *
+ * @return true if timer is started.
+ */
+ public boolean isStarted() {
+ return this.started;
+ }
+
+ /**
+ * Format the elapsed time to a human understandable string.
+ *
+ * Note: If timer is never started, "ERR" will be returned.
+ */
+ public String toString() {
+ if (!readable()) {
+ return "ERR";
+ }
+
+ return NanoTimer.nanoTimeToString(this.cumulate);
+ }
+
+ /**
+ * A utility method to format a time duration in nano seconds into a human
+ * understandable stirng.
+ *
+ * @param t
+ * Time duration in nano seconds.
+ * @return String representation.
+ */
+ public static String nanoTimeToString(long t) {
+ if (t < 0) return "ERR";
+
+ if (t == 0) return "0";
+
+ if (t < 1000) {
+ return t + "ns";
+ }
+
+ double us = (double) t / 1000;
+ if (us < 1000) {
+ return String.format("%.2fus", us);
+ }
+
+ double ms = us / 1000;
+ if (ms < 1000) {
+ return String.format("%.2fms", ms);
+ }
+
+ double ss = ms / 1000;
+ if (ss < 1000) {
+ return String.format("%.2fs", ss);
+ }
+
+ long mm = (long) ss / 60;
+ ss -= mm * 60;
+ long hh = mm / 60;
+ mm -= hh * 60;
+ long dd = hh / 24;
+ hh -= dd * 24;
+
+ if (dd > 0) {
+ return String.format("%dd%dh", dd, hh);
+ }
+
+ if (hh > 0) {
+ return String.format("%dh%dm", hh, mm);
+ }
+
+ if (mm > 0) {
+ return String.format("%dm%.1fs", mm, ss);
+ }
+
+ return String.format("%.2fs", ss);
+
+ /**
+ * StringBuilder sb = new StringBuilder(); String sep = "";
+ *
+ * if (dd > 0) { String unit = (dd > 1) ? "days" : "day";
+ * sb.append(String.format("%s%d%s", sep, dd, unit)); sep = " "; }
+ *
+ * if (hh > 0) { String unit = (hh > 1) ? "hrs" : "hr";
+ * sb.append(String.format("%s%d%s", sep, hh, unit)); sep = " "; }
+ *
+ * if (mm > 0) { String unit = (mm > 1) ? "mins" : "min";
+ * sb.append(String.format("%s%d%s", sep, mm, unit)); sep = " "; }
+ *
+ * if (ss > 0) { String unit = (ss > 1) ? "secs" : "sec";
+ * sb.append(String.format("%s%.3f%s", sep, ss, unit)); sep = " "; }
+ *
+ * return sb.toString();
+ */
+ }
+
+ private boolean readable() {
+ return this.last != -1;
+ }
+
+ /**
+ * Simple tester.
+ *
+ * @param args
+ */
+ public static void main(String[] args) {
+ long i = 7;
+
+ for (int x = 0; x < 20; ++x, i *= 7) {
+ System.out.println(NanoTimer.nanoTimeToString(i));
+ }
+ }
+}
+
Added: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/RandomDistribution.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/RandomDistribution.java?rev=747672&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/RandomDistribution.java (added)
+++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/RandomDistribution.java Wed Feb 25 05:59:26 2009
@@ -0,0 +1,271 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.hbase.io.hfile;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Random;
+
+/**
+ * A class that generates random numbers that follow some distribution.
+ * <p>
+ * Copied from
+ * <a href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 tfile</a>.
+ * Remove after tfile is committed and use the tfile version of this class
+ * instead.</p>
+ */
+public class RandomDistribution {
+ /**
+ * Interface for discrete (integer) random distributions.
+ */
+ public static interface DiscreteRNG {
+ /**
+ * Get the next random number
+ *
+ * @return the next random number.
+ */
+ public int nextInt();
+ }
+
+ /**
+ * P(i)=1/(max-min)
+ */
+ public static final class Flat implements DiscreteRNG {
+ private final Random random;
+ private final int min;
+ private final int max;
+
+ /**
+ * Generate random integers from min (inclusive) to max (exclusive)
+ * following even distribution.
+ *
+ * @param random
+ * The basic random number generator.
+ * @param min
+ * Minimum integer
+ * @param max
+ * maximum integer (exclusive).
+ *
+ */
+ public Flat(Random random, int min, int max) {
+ if (min >= max) {
+ throw new IllegalArgumentException("Invalid range");
+ }
+ this.random = random;
+ this.min = min;
+ this.max = max;
+ }
+
+ /**
+ * @see DiscreteRNG#nextInt()
+ */
+ @Override
+ public int nextInt() {
+ return random.nextInt(max - min) + min;
+ }
+ }
+
+ /**
+ * Zipf distribution. The ratio of the probabilities of integer i and j is
+ * defined as follows:
+ *
+ * P(i)/P(j)=((j-min+1)/(i-min+1))^sigma.
+ */
+ public static final class Zipf implements DiscreteRNG {
+ private static final double DEFAULT_EPSILON = 0.001;
+ private final Random random;
+ private final ArrayList<Integer> k;
+ private final ArrayList<Double> v;
+
+ /**
+ * Constructor
+ *
+ * @param r
+ * The random number generator.
+ * @param min
+ * minimum integer (inclusvie)
+ * @param max
+ * maximum integer (exclusive)
+ * @param sigma
+ * parameter sigma. (sigma > 1.0)
+ */
+ public Zipf(Random r, int min, int max, double sigma) {
+ this(r, min, max, sigma, DEFAULT_EPSILON);
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param r
+ * The random number generator.
+ * @param min
+ * minimum integer (inclusvie)
+ * @param max
+ * maximum integer (exclusive)
+ * @param sigma
+ * parameter sigma. (sigma > 1.0)
+ * @param epsilon
+ * Allowable error percentage (0 < epsilon < 1.0).
+ */
+ public Zipf(Random r, int min, int max, double sigma, double epsilon) {
+ if ((max <= min) || (sigma <= 1) || (epsilon <= 0)
+ || (epsilon >= 0.5)) {
+ throw new IllegalArgumentException("Invalid arguments");
+ }
+ random = r;
+ k = new ArrayList<Integer>();
+ v = new ArrayList<Double>();
+
+ double sum = 0;
+ int last = -1;
+ for (int i = min; i < max; ++i) {
+ sum += Math.exp(-sigma * Math.log(i - min + 1));
+ if ((last == -1) || i * (1 - epsilon) > last) {
+ k.add(i);
+ v.add(sum);
+ last = i;
+ }
+ }
+
+ if (last != max - 1) {
+ k.add(max - 1);
+ v.add(sum);
+ }
+
+ v.set(v.size() - 1, 1.0);
+
+ for (int i = v.size() - 2; i >= 0; --i) {
+ v.set(i, v.get(i) / sum);
+ }
+ }
+
+ /**
+ * @see DiscreteRNG#nextInt()
+ */
+ @Override
+ public int nextInt() {
+ double d = random.nextDouble();
+ int idx = Collections.binarySearch(v, d);
+
+ if (idx > 0) {
+ ++idx;
+ }
+ else {
+ idx = -(idx + 1);
+ }
+
+ if (idx >= v.size()) {
+ idx = v.size() - 1;
+ }
+
+ if (idx == 0) {
+ return k.get(0);
+ }
+
+ int ceiling = k.get(idx);
+ int lower = k.get(idx - 1);
+
+ return ceiling - random.nextInt(ceiling - lower);
+ }
+ }
+
+ /**
+ * Binomial distribution.
+ *
+ * P(k)=select(n, k)*p^k*(1-p)^(n-k) (k = 0, 1, ..., n)
+ *
+ * P(k)=select(max-min-1, k-min)*p^(k-min)*(1-p)^(k-min)*(1-p)^(max-k-1)
+ */
+ public static final class Binomial implements DiscreteRNG {
+ private final Random random;
+ private final int min;
+ private final int n;
+ private final double[] v;
+
+ private static double select(int n, int k) {
+ double ret = 1.0;
+ for (int i = k + 1; i <= n; ++i) {
+ ret *= (double) i / (i - k);
+ }
+ return ret;
+ }
+
+ private static double power(double p, int k) {
+ return Math.exp(k * Math.log(p));
+ }
+
+ /**
+ * Generate random integers from min (inclusive) to max (exclusive)
+ * following Binomial distribution.
+ *
+ * @param random
+ * The basic random number generator.
+ * @param min
+ * Minimum integer
+ * @param max
+ * maximum integer (exclusive).
+ * @param p
+ * parameter.
+ *
+ */
+ public Binomial(Random random, int min, int max, double p) {
+ if (min >= max) {
+ throw new IllegalArgumentException("Invalid range");
+ }
+ this.random = random;
+ this.min = min;
+ this.n = max - min - 1;
+ if (n > 0) {
+ v = new double[n + 1];
+ double sum = 0.0;
+ for (int i = 0; i <= n; ++i) {
+ sum += select(n, i) * power(p, i) * power(1 - p, n - i);
+ v[i] = sum;
+ }
+ for (int i = 0; i <= n; ++i) {
+ v[i] /= sum;
+ }
+ }
+ else {
+ v = null;
+ }
+ }
+
+ /**
+ * @see DiscreteRNG#nextInt()
+ */
+ @Override
+ public int nextInt() {
+ if (v == null) {
+ return min;
+ }
+ double d = random.nextDouble();
+ int idx = Arrays.binarySearch(v, d);
+ if (idx > 0) {
+ ++idx;
+ } else {
+ idx = -(idx + 1);
+ }
+
+ if (idx >= v.length) {
+ idx = v.length - 1;
+ }
+ return idx + min;
+ }
+ }
+}
Added: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/RandomSeek.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/RandomSeek.java?rev=747672&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/RandomSeek.java (added)
+++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/RandomSeek.java Wed Feb 25 05:59:26 2009
@@ -0,0 +1,124 @@
+/**
+ * Copyright 2009 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io.hfile;
+
+import java.io.BufferedReader;
+import java.io.FileReader;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RawLocalFileSystem;
+import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
+import org.apache.hadoop.hbase.util.Bytes;
+
+/**
+ * Random seek test.
+ */
+public class RandomSeek {
+ private static List<String> slurp(String fname) throws IOException {
+ BufferedReader istream = new BufferedReader(new FileReader(fname));
+ String str;
+ List<String> l = new ArrayList<String>();
+ while ( (str=istream.readLine()) != null) {
+ String [] parts = str.split(",");
+ l.add(parts[0] + ":" + parts[1] + ":" + parts[2]);
+ }
+ return l;
+ }
+ private static String randKey(List<String> keys) {
+ Random r = new Random();
+ //return keys.get(r.nextInt(keys.size()));
+ return "2" + Integer.toString(7+r.nextInt(2)) + Integer.toString(r.nextInt(100));
+ //return new String(r.nextInt(100));
+ }
+
+ public static void main(String [] argv) throws IOException {
+ Configuration conf = new Configuration();
+ conf.setInt("io.file.buffer.size", 64*1024);
+ RawLocalFileSystem rlfs = new RawLocalFileSystem();
+ rlfs.setConf(conf);
+ LocalFileSystem lfs = new LocalFileSystem(rlfs);
+
+ Path path = new Path("/Users/ryan/rfile.big.txt");
+ long start = System.currentTimeMillis();
+ SimpleBlockCache cache = new SimpleBlockCache();
+ //LruBlockCache cache = new LruBlockCache();
+ Reader reader = new HFile.Reader(lfs, path, cache);
+ reader.loadFileInfo();
+ System.out.println(reader.trailer);
+ long end = System.currentTimeMillis();
+
+ System.out.println("Index read time: " + (end - start));
+
+ List<String> keys = slurp("/Users/ryan/xaa.50k");
+
+ HFileScanner scanner = reader.getScanner();
+ int count;
+ long totalBytes = 0;
+ int notFound = 0;
+
+ start = System.nanoTime();
+ for(count = 0; count < 500000; ++count) {
+ String key = randKey(keys);
+ byte [] bkey = Bytes.toBytes(key);
+ int res = scanner.seekTo(bkey);
+ if (res == 0) {
+ ByteBuffer k = scanner.getKey();
+ ByteBuffer v = scanner.getValue();
+ totalBytes += k.limit();
+ totalBytes += v.limit();
+ } else {
+ ++ notFound;
+ }
+ if (res == -1) {
+ scanner.seekTo();
+ }
+ // Scan for another 1000 rows.
+ for (int i = 0; i < 1000; ++i) {
+ if (!scanner.next())
+ break;
+ ByteBuffer k = scanner.getKey();
+ ByteBuffer v = scanner.getValue();
+ totalBytes += k.limit();
+ totalBytes += v.limit();
+ }
+
+ if ( count % 1000 == 0 ) {
+ end = System.nanoTime();
+
+ System.out.println("Cache block count: " + cache.size() + " dumped: "+ cache.dumps);
+ //System.out.println("Cache size: " + cache.heapSize());
+ double msTime = ((end - start) / 1000000.0);
+ System.out.println("Seeked: "+ count + " in " + msTime + " (ms) "
+ + (1000.0 / msTime ) + " seeks/ms "
+ + (msTime / 1000.0) + " ms/seek");
+
+ start = System.nanoTime();
+ }
+ }
+ System.out.println("Total bytes: " + totalBytes + " not found: " + notFound);
+ }
+}
Added: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/TestHFile.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/TestHFile.java?rev=747672&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/TestHFile.java (added)
+++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/TestHFile.java Wed Feb 25 05:59:26 2009
@@ -0,0 +1,248 @@
+/**
+ * Copyright 2009 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io.hfile;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+
+import junit.framework.TestCase;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RawLocalFileSystem;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
+import org.apache.hadoop.hbase.io.hfile.HFile.Writer;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.RawComparator;
+
+/**
+ * test hfile features.
+ * <p>
+ * Copied from
+ * <a href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 tfile</a>.
+ * Remove after tfile is committed and use the tfile version of this class
+ * instead.</p>
+ */
+public class TestHFile extends TestCase {
+ static final Log LOG = LogFactory.getLog(TestHFile.class);
+
+ private static String ROOT_DIR =
+ System.getProperty("test.build.data", "/tmp/TestHFile");
+ private FileSystem fs;
+ private Configuration conf;
+ private final int minBlockSize = 512;
+ private static String localFormatter = "%010d";
+
+ @Override
+ public void setUp() {
+ conf = new HBaseConfiguration();
+ RawLocalFileSystem rawLFS = new RawLocalFileSystem();
+ rawLFS.setConf(conf);
+ fs = new LocalFileSystem(rawLFS);
+ }
+
+ // write some records into the tfile
+ // write them twice
+ private int writeSomeRecords(Writer writer, int start, int n)
+ throws IOException {
+ String value = "value";
+ for (int i = start; i < (start + n); i++) {
+ String key = String.format(localFormatter, Integer.valueOf(i));
+ writer.append(Bytes.toBytes(key), Bytes.toBytes(value + key));
+ }
+ return (start + n);
+ }
+
+ private void readAllRecords(HFileScanner scanner) throws IOException {
+ readAndCheckbytes(scanner, 0, 100);
+ }
+
+ // read the records and check
+ private int readAndCheckbytes(HFileScanner scanner, int start, int n)
+ throws IOException {
+ String value = "value";
+ int i = start;
+ for (; i < (start + n); i++) {
+ ByteBuffer key = scanner.getKey();
+ ByteBuffer val = scanner.getValue();
+ String keyStr = String.format(localFormatter, Integer.valueOf(i));
+ String valStr = value + keyStr;
+ byte [] keyBytes = Bytes.toBytes(key);
+ assertTrue("bytes for keys do not match " + keyStr + " " +
+ Bytes.toString(Bytes.toBytes(key)),
+ Arrays.equals(Bytes.toBytes(keyStr), keyBytes));
+ byte [] valBytes = Bytes.toBytes(val);
+ assertTrue("bytes for vals do not match " + valStr + " " +
+ Bytes.toString(valBytes),
+ Arrays.equals(Bytes.toBytes(valStr), valBytes));
+ if (!scanner.next()) {
+ break;
+ }
+ }
+ assertEquals(i, start + n - 1);
+ return (start + n);
+ }
+
+ private byte[] getSomeKey(int rowId) {
+ return String.format(localFormatter, Integer.valueOf(rowId)).getBytes();
+ }
+
+ private void writeRecords(Writer writer) throws IOException {
+ writeSomeRecords(writer, 0, 100);
+ writer.close();
+ }
+
+ private FSDataOutputStream createFSOutput(Path name) throws IOException {
+ if (fs.exists(name)) fs.delete(name, true);
+ FSDataOutputStream fout = fs.create(name);
+ return fout;
+ }
+
+ /**
+ * test none codecs
+ */
+ void basicWithSomeCodec(String codec) throws IOException {
+ Path ncTFile = new Path(ROOT_DIR, "basic.hfile");
+ FSDataOutputStream fout = createFSOutput(ncTFile);
+ Writer writer = new Writer(fout, minBlockSize, codec, null);
+ LOG.info(writer);
+ writeRecords(writer);
+ fout.close();
+ FSDataInputStream fin = fs.open(ncTFile);
+ Reader reader = new Reader(fs.open(ncTFile),
+ fs.getFileStatus(ncTFile).getLen(), null);
+ // Load up the index.
+ reader.loadFileInfo();
+ LOG.info(reader);
+ HFileScanner scanner = reader.getScanner();
+ // Align scanner at start of the file.
+ scanner.seekTo();
+ readAllRecords(scanner);
+ scanner.seekTo(getSomeKey(50));
+ assertTrue("location lookup failed", scanner.seekTo(getSomeKey(50)) == 0);
+ // read the key and see if it matches
+ ByteBuffer readKey = scanner.getKey();
+ assertTrue("seeked key does not match", Arrays.equals(getSomeKey(50),
+ Bytes.toBytes(readKey)));
+
+ scanner.seekTo(new byte[0]);
+ ByteBuffer val1 = scanner.getValue();
+ scanner.seekTo(new byte[0]);
+ ByteBuffer val2 = scanner.getValue();
+ assertTrue(Arrays.equals(Bytes.toBytes(val1), Bytes.toBytes(val2)));
+
+ reader.close();
+ fin.close();
+ fs.delete(ncTFile, true);
+ }
+
+ public void testTFileFeatures() throws IOException {
+ basicWithSomeCodec("none");
+ basicWithSomeCodec("gz");
+ }
+
+ private void writeNumMetablocks(Writer writer, int n) {
+ for (int i = 0; i < n; i++) {
+ writer.appendMetaBlock("TfileMeta" + i, ("something to test" + i).getBytes());
+ }
+ }
+
+ private void someTestingWithMetaBlock(Writer writer) {
+ writeNumMetablocks(writer, 10);
+ }
+
+ private void readNumMetablocks(Reader reader, int n) throws IOException {
+ for (int i = 0; i < n; i++) {
+ ByteBuffer b = reader.getMetaBlock("TfileMeta" + i);
+ byte [] found = Bytes.toBytes(b);
+ assertTrue("failed to match metadata", Arrays.equals(
+ ("something to test" + i).getBytes(), found));
+ }
+ }
+
+ private void someReadingWithMetaBlock(Reader reader) throws IOException {
+ readNumMetablocks(reader, 10);
+ }
+
+ private void metablocks(final String compress) throws Exception {
+ Path mFile = new Path(ROOT_DIR, "meta.tfile");
+ FSDataOutputStream fout = createFSOutput(mFile);
+ Writer writer = new Writer(fout, minBlockSize, compress, null);
+ someTestingWithMetaBlock(writer);
+ writer.close();
+ fout.close();
+ FSDataInputStream fin = fs.open(mFile);
+ Reader reader = new Reader(fs.open(mFile), this.fs.getFileStatus(mFile)
+ .getLen(), null);
+ reader.loadFileInfo();
+ // No data -- this should return false.
+ assertFalse(reader.getScanner().seekTo());
+ someReadingWithMetaBlock(reader);
+ fs.delete(mFile, true);
+ reader.close();
+ fin.close();
+ }
+
+ // test meta blocks for tfiles
+ public void testMetaBlocks() throws Exception {
+ metablocks("none");
+ metablocks("gz");
+ }
+
+ /**
+ * Make sure the orginals for our compression libs doesn't change on us.
+ */
+ public void testCompressionOrdinance() {
+ assertTrue(Compression.Algorithm.LZO.ordinal() == 0);
+ assertTrue(Compression.Algorithm.GZ.ordinal() == 1);
+ assertTrue(Compression.Algorithm.NONE.ordinal() == 2);
+ }
+
+
+ public void testComparator() throws IOException {
+ Path mFile = new Path(ROOT_DIR, "meta.tfile");
+ FSDataOutputStream fout = createFSOutput(mFile);
+ Writer writer = new Writer(fout, minBlockSize, "none",
+ new RawComparator<byte []>() {
+ @Override
+ public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2,
+ int l2) {
+ return -Bytes.compareTo(b1, s1, l1, b2, s2, l2);
+
+ }
+ @Override
+ public int compare(byte[] o1, byte[] o2) {
+ return compare(o1, 0, o1.length, o2, 0, o2.length);
+ }
+ });
+ writer.append("3".getBytes(), "0".getBytes());
+ writer.append("2".getBytes(), "0".getBytes());
+ writer.append("1".getBytes(), "0".getBytes());
+ writer.close();
+ }
+}
\ No newline at end of file
Added: hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/TestHFilePerformance.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/TestHFilePerformance.java?rev=747672&view=auto
==============================================================================
--- hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/TestHFilePerformance.java (added)
+++ hadoop/hbase/trunk/src/test/org/apache/hadoop/hbase/io/hfile/TestHFilePerformance.java Wed Feb 25 05:59:26 2009
@@ -0,0 +1,387 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io.hfile;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.text.DateFormat;
+import java.text.SimpleDateFormat;
+import java.util.Random;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.GzipCodec;
+import org.apache.hadoop.io.compress.LzoCodec;
+
+/**
+ * Set of long-running tests to measure performance of HFile.
+ * <p>
+ * Copied from
+ * <a href="https://issues.apache.org/jira/browse/HADOOP-3315">hadoop-3315 tfile</a>.
+ * Remove after tfile is committed and use the tfile version of this class
+ * instead.</p>
+ */
+public class TestHFilePerformance extends TestCase {
+ private static String ROOT_DIR =
+ System.getProperty("test.build.data", "/tmp/TestHFilePerformance");
+ private FileSystem fs;
+ private Configuration conf;
+ private long startTimeEpoch;
+ private long finishTimeEpoch;
+ private DateFormat formatter;
+
+ @Override
+ public void setUp() throws IOException {
+ conf = new Configuration();
+ fs = FileSystem.get(conf);
+ formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ }
+
+ public void startTime() {
+ startTimeEpoch = System.currentTimeMillis();
+ System.out.println(formatTime() + " Started timing.");
+ }
+
+ public void stopTime() {
+ finishTimeEpoch = System.currentTimeMillis();
+ System.out.println(formatTime() + " Stopped timing.");
+ }
+
+ public long getIntervalMillis() {
+ return finishTimeEpoch - startTimeEpoch;
+ }
+
+ public void printlnWithTimestamp(String message) {
+ System.out.println(formatTime() + " " + message);
+ }
+
+ /*
+ * Format millis into minutes and seconds.
+ */
+ public String formatTime(long milis){
+ return formatter.format(milis);
+ }
+
+ public String formatTime(){
+ return formatTime(System.currentTimeMillis());
+ }
+
+ private FSDataOutputStream createFSOutput(Path name) throws IOException {
+ if (fs.exists(name))
+ fs.delete(name, true);
+ FSDataOutputStream fout = fs.create(name);
+ return fout;
+ }
+
+ //TODO have multiple ways of generating key/value e.g. dictionary words
+ //TODO to have a sample compressable data, for now, made 1 out of 3 values random
+ // keys are all random.
+
+ private static class KeyValueGenerator {
+ Random keyRandomizer;
+ Random valueRandomizer;
+ long randomValueRatio = 3; // 1 out of randomValueRatio generated values will be random.
+ long valueSequence = 0 ;
+
+
+ KeyValueGenerator() {
+ keyRandomizer = new Random(0L); //TODO with seed zero
+ valueRandomizer = new Random(1L); //TODO with seed one
+ }
+
+ // Key is always random now.
+ void getKey(byte[] key) {
+ keyRandomizer.nextBytes(key);
+ }
+
+ void getValue(byte[] value) {
+ if (valueSequence % randomValueRatio == 0)
+ valueRandomizer.nextBytes(value);
+ valueSequence++;
+ }
+ }
+
+ /**
+ *
+ * @param fileType "HFile" or "SequenceFile"
+ * @param keyLength
+ * @param valueLength
+ * @param codecName "none", "lzo", "gz"
+ * @param rows number of rows to be written.
+ * @param writeMethod used for HFile only.
+ * @param minBlockSize used for HFile only.
+ * @throws IOException
+ */
+ //TODO writeMethod: implement multiple ways of writing e.g. A) known length (no chunk) B) using a buffer and streaming (for many chunks).
+ public void timeWrite(String fileType, int keyLength, int valueLength,
+ String codecName, long rows, String writeMethod, int minBlockSize)
+ throws IOException {
+ System.out.println("File Type: " + fileType);
+ System.out.println("Writing " + fileType + " with codecName: " + codecName);
+ long totalBytesWritten = 0;
+
+
+ //Using separate randomizer for key/value with seeds matching Sequence File.
+ byte[] key = new byte[keyLength];
+ byte[] value = new byte[valueLength];
+ KeyValueGenerator generator = new KeyValueGenerator();
+
+ startTime();
+
+ Path path = new Path(ROOT_DIR, fileType + ".Performance");
+ System.out.println(ROOT_DIR + path.getName());
+ FSDataOutputStream fout = createFSOutput(path);
+
+ if ("HFile".equals(fileType)){
+ System.out.println("HFile write method: ");
+ HFile.Writer writer =
+ new HFile.Writer(fout, minBlockSize, codecName, null);
+
+ // Writing value in one shot.
+ for (long l=0 ; l<rows ; l++ ) {
+ generator.getKey(key);
+ generator.getValue(value);
+ writer.append(key, value);
+ totalBytesWritten += key.length;
+ totalBytesWritten += value.length;
+ }
+ writer.close();
+ } else if ("SequenceFile".equals(fileType)){
+ CompressionCodec codec = null;
+ if ("lzo".equals(codecName))
+ codec = new LzoCodec();
+ else if ("gz".equals(codecName))
+ codec = new GzipCodec();
+ else if (!"none".equals(codecName))
+ throw new IOException("Codec not supported.");
+
+ SequenceFile.Writer writer;
+
+ //TODO
+ //JobConf conf = new JobConf();
+
+ if (!"none".equals(codecName))
+ writer = SequenceFile.createWriter(conf, fout, BytesWritable.class,
+ BytesWritable.class, SequenceFile.CompressionType.BLOCK, codec);
+ else
+ writer = SequenceFile.createWriter(conf, fout, BytesWritable.class,
+ BytesWritable.class, SequenceFile.CompressionType.NONE, null);
+
+ BytesWritable keyBsw;
+ BytesWritable valBsw;
+ for (long l=0 ; l<rows ; l++ ) {
+
+ generator.getKey(key);
+ keyBsw = new BytesWritable(key);
+ totalBytesWritten += keyBsw.getSize();
+
+ generator.getValue(value);
+ valBsw = new BytesWritable(value);
+ writer.append(keyBsw, valBsw);
+ totalBytesWritten += valBsw.getSize();
+ }
+
+ writer.close();
+ } else
+ throw new IOException("File Type is not supported");
+
+ fout.close();
+ stopTime();
+
+ printlnWithTimestamp("Data written: ");
+ printlnWithTimestamp(" rate = " +
+ totalBytesWritten / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
+ printlnWithTimestamp(" total = " + totalBytesWritten + "B");
+
+ printlnWithTimestamp("File written: ");
+ printlnWithTimestamp(" rate = " +
+ fs.getFileStatus(path).getLen() / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
+ printlnWithTimestamp(" total = " + fs.getFileStatus(path).getLen() + "B");
+ }
+
+ public void timeReading(String fileType, int keyLength, int valueLength,
+ long rows, int method) throws IOException {
+ System.out.println("Reading file of type: " + fileType);
+ Path path = new Path(ROOT_DIR, fileType + ".Performance");
+ System.out.println("Input file size: " + fs.getFileStatus(path).getLen());
+ long totalBytesRead = 0;
+
+
+ ByteBuffer val;
+
+ ByteBuffer key;
+
+ startTime();
+ FSDataInputStream fin = fs.open(path);
+
+ if ("HFile".equals(fileType)){
+ HFile.Reader reader = new HFile.Reader(fs.open(path),
+ fs.getFileStatus(path).getLen(), null);
+ reader.loadFileInfo();
+ System.out.println(reader);
+ switch (method) {
+
+ case 0:
+ case 1:
+ default:
+ {
+ HFileScanner scanner = reader.getScanner();
+ scanner.seekTo();
+ for (long l=0 ; l<rows ; l++ ) {
+ key = scanner.getKey();
+ val = scanner.getValue();
+ totalBytesRead += key.limit() + val.limit();
+ scanner.next();
+ }
+ }
+ break;
+ }
+ } else if("SequenceFile".equals(fileType)){
+
+ SequenceFile.Reader reader;
+ reader = new SequenceFile.Reader(fs, path, new Configuration());
+
+ if (reader.getCompressionCodec() != null) {
+ printlnWithTimestamp("Compression codec class: " + reader.getCompressionCodec().getClass());
+ } else
+ printlnWithTimestamp("Compression codec class: " + "none");
+
+ BytesWritable keyBsw = new BytesWritable();
+ BytesWritable valBsw = new BytesWritable();
+
+ for (long l=0 ; l<rows ; l++ ) {
+ reader.next(keyBsw, valBsw);
+ totalBytesRead += keyBsw.getSize() + valBsw.getSize();
+ }
+ reader.close();
+
+ //TODO make a tests for other types of SequenceFile reading scenarios
+
+ } else {
+ throw new IOException("File Type not supported.");
+ }
+
+
+ //printlnWithTimestamp("Closing reader");
+ fin.close();
+ stopTime();
+ //printlnWithTimestamp("Finished close");
+
+ printlnWithTimestamp("Finished in " + getIntervalMillis() + "ms");
+ printlnWithTimestamp("Data read: ");
+ printlnWithTimestamp(" rate = " +
+ totalBytesRead / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
+ printlnWithTimestamp(" total = " + totalBytesRead + "B");
+
+ printlnWithTimestamp("File read: ");
+ printlnWithTimestamp(" rate = " +
+ fs.getFileStatus(path).getLen() / getIntervalMillis() * 1000 / 1024 / 1024 + "MB/s");
+ printlnWithTimestamp(" total = " + fs.getFileStatus(path).getLen() + "B");
+
+ //TODO uncomment this for final committing so test files is removed.
+ //fs.delete(path, true);
+ }
+
+ public void testRunComparisons() throws IOException {
+
+ int keyLength = 100; // 100B
+ int valueLength = 5*1024; // 5KB
+ int minBlockSize = 10*1024*1024; // 10MB
+ int rows = 10000;
+
+ System.out.println("****************************** Sequence File *****************************");
+
+ timeWrite("SequenceFile", keyLength, valueLength, "none", rows, null, minBlockSize);
+ System.out.println("\n+++++++\n");
+ timeReading("SequenceFile", keyLength, valueLength, rows, -1);
+
+ System.out.println("");
+ System.out.println("----------------------");
+ System.out.println("");
+
+ /* DISABLED LZO
+ timeWrite("SequenceFile", keyLength, valueLength, "lzo", rows, null, minBlockSize);
+ System.out.println("\n+++++++\n");
+ timeReading("SequenceFile", keyLength, valueLength, rows, -1);
+
+ System.out.println("");
+ System.out.println("----------------------");
+ System.out.println("");
+
+ /* Sequence file can only use native hadoop libs gzipping so commenting out.
+ */
+ try {
+ timeWrite("SequenceFile", keyLength, valueLength, "gz", rows, null,
+ minBlockSize);
+ System.out.println("\n+++++++\n");
+ timeReading("SequenceFile", keyLength, valueLength, rows, -1);
+ } catch (IllegalArgumentException e) {
+ System.out.println("Skipping sequencefile gz: " + e.getMessage());
+ }
+
+
+ System.out.println("\n\n\n");
+ System.out.println("****************************** HFile *****************************");
+
+ timeWrite("HFile", keyLength, valueLength, "none", rows, null, minBlockSize);
+ System.out.println("\n+++++++\n");
+ timeReading("HFile", keyLength, valueLength, rows, 0 );
+
+ System.out.println("");
+ System.out.println("----------------------");
+ System.out.println("");
+/* DISABLED LZO
+ timeWrite("HFile", keyLength, valueLength, "lzo", rows, null, minBlockSize);
+ System.out.println("\n+++++++\n");
+ timeReading("HFile", keyLength, valueLength, rows, 0 );
+ System.out.println("\n+++++++\n");
+ timeReading("HFile", keyLength, valueLength, rows, 1 );
+ System.out.println("\n+++++++\n");
+ timeReading("HFile", keyLength, valueLength, rows, 2 );
+
+ System.out.println("");
+ System.out.println("----------------------");
+ System.out.println("");
+*/
+ timeWrite("HFile", keyLength, valueLength, "gz", rows, null, minBlockSize);
+ System.out.println("\n+++++++\n");
+ timeReading("HFile", keyLength, valueLength, rows, 0 );
+
+ System.out.println("\n\n\n\nNotes: ");
+ System.out.println(" * Timing includes open/closing of files.");
+ System.out.println(" * Timing includes reading both Key and Value");
+ System.out.println(" * Data is generated as random bytes. Other methods e.g. using " +
+ "dictionary with care for distributation of words is under development.");
+ System.out.println(" * Timing of write currently, includes random value/key generations. " +
+ "Which is the same for Sequence File and HFile. Another possibility is to generate " +
+ "test data beforehand");
+ System.out.println(" * We need to mitigate cache effect on benchmark. We can apply several " +
+ "ideas, for next step we do a large dummy read between benchmark read to dismantle " +
+ "caching of data. Renaming of file may be helpful. We can have a loop that reads with" +
+ " the same method several times and flood cache every time and average it to get a" +
+ " better number.");
+ }
+}
\ No newline at end of file