You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by mb...@apache.org on 2012/10/23 14:51:38 UTC
svn commit: r1401280 - in
/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks:
Benchmark.java RegionScannerBenchmark.java ScanBenchmark.java
Author: mbautin
Date: Tue Oct 23 12:51:38 2012
New Revision: 1401280
URL: http://svn.apache.org/viewvc?rev=1401280&view=rev
Log:
[HBASE-6923] Improvements to scan benchmark
Author: kranganathan
Summary:
- Added a framework for running benchmark experiments
- Class to track results and reporting
- RegionScanner benchmark
Test Plan: Tested by running benchmarks
Reviewers: kannan
Reviewed By: kannan
CC: hbase-eng@
Differential Revision: https://phabricator.fb.com/D607625
Added:
hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/RegionScannerBenchmark.java
Modified:
hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/Benchmark.java
hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/ScanBenchmark.java
Modified: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/Benchmark.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/Benchmark.java?rev=1401280&r1=1401279&r2=1401280&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/Benchmark.java (original)
+++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/Benchmark.java Tue Oct 23 12:51:38 2012
@@ -6,6 +6,7 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.loadtest.ColumnFamilyProperties;
import org.apache.hadoop.hbase.loadtest.HBaseUtils;
@@ -67,7 +68,7 @@ public abstract class Benchmark {
* @throws IOException
*/
public HTable createTableAndLoadData(byte[] tableName, int kvSize,
- long numKVs) throws IOException {
+ long numKVs, boolean flushData) throws IOException {
HTable htable = null;
try {
htable = new HTable(conf, tableName);
@@ -95,9 +96,9 @@ public abstract class Benchmark {
LOG.info("Loading data for the table");
String[] loadTestToolArgs = {
"-zk", "localhost",
- "-tn", "bench.ScanFromMemoryPerf",
+ "-tn", new String(tableName),
"-cf", familyProperty.familyName,
- "-write", "1:50",
+ "-write", "1:" + kvSize,
"-num_keys", "" + numKVs,
"-multiput",
"-compression", "NONE",
@@ -105,6 +106,18 @@ public abstract class Benchmark {
LoadTestTool.doMain(loadTestToolArgs);
LOG.info("Done loading data");
+ if (flushData) {
+ LOG.info("Flush of data requested");
+ HBaseAdmin admin = new HBaseAdmin(conf);
+ admin.flush(tableName);
+ try {
+ Thread.sleep(2*1000);
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+ LOG.info("Done flushing data");
+ }
+
htable = new HTable(conf, tableName);
return htable;
}
Added: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/RegionScannerBenchmark.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/RegionScannerBenchmark.java?rev=1401280&view=auto
==============================================================================
--- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/RegionScannerBenchmark.java (added)
+++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/RegionScannerBenchmark.java Tue Oct 23 12:51:38 2012
@@ -0,0 +1,155 @@
+package org.apache.hadoop.hbase.benchmarks;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.regionserver.InternalScanner;
+import org.apache.hadoop.hbase.regionserver.wal.HLog;
+import org.apache.hadoop.hbase.util.Bytes;
+
+/**
+ * This test benchmarks the performance of scanning from the block cache when
+ * run as a library. Note that this will not have the overheads of reporting
+ * metrics, etc. This is the theoretical throughput the RegionScanner can
+ * achieve, so this is a bound for the scan throughput.
+ */
+public class RegionScannerBenchmark {
+ public static final Log LOG = LogFactory.getLog(RegionScannerBenchmark.class);
+ private Configuration conf;
+ private FileSystem fs;
+ private Path hbaseRootDir = null;
+ private Path oldLogDir;
+ private Path logDir;
+
+ public RegionScannerBenchmark() throws IOException {
+ conf = HBaseConfiguration.create();
+ fs = FileSystem.get(conf);
+ this.hbaseRootDir = new Path(this.conf.get(HConstants.HBASE_DIR));
+ this.oldLogDir =
+ new Path(this.hbaseRootDir, HConstants.HREGION_OLDLOGDIR_NAME);
+ this.logDir = new Path(this.hbaseRootDir, HConstants.HREGION_LOGDIR_NAME);
+
+ reinitialize();
+ }
+
+ public void reinitialize() throws IOException {
+ if (fs.exists(this.hbaseRootDir)) {
+ fs.delete(this.hbaseRootDir, true);
+ }
+ Path rootdir = fs.makeQualified(new Path(conf.get(HConstants.HBASE_DIR)));
+ fs.mkdirs(rootdir);
+ }
+
+ public void runBenchmark() throws Throwable {
+ String tableNameStr = "RegionScannerBenchmark";
+ HRegion region = createAndbulkLoadHRegion(tableNameStr, 50, 1000000);
+
+ // warm block cache and jvm jit compilation
+ for (int i = 0; i < 20; i++) {
+ scanHRegion(true, region);
+ }
+ }
+
+ public void scanHRegion(boolean printStats, HRegion region)
+ throws IOException {
+ // create the scan object with the right params
+ Scan scan = new Scan();
+ scan.setMaxVersions(1);
+
+ // create the RegionScanner object
+ InternalScanner scanner = region.getScanner(scan);
+
+ // do the scan
+ long numKVs = 0;
+ long numBytes = 0;
+ List<KeyValue> results = new ArrayList<KeyValue>();
+ long t1 = System.currentTimeMillis();
+ while (scanner.next(results) || results.size() > 0) {
+ for (KeyValue kv : results) {
+ numKVs++;
+ numBytes += kv.getLength();
+ }
+ results.clear();
+ }
+ long t2 = System.currentTimeMillis();
+ scanner.close();
+
+ if (printStats) {
+ double numBytesInMB = numBytes * 1.0 / (1024 * 1024);
+ double rate = numBytesInMB * (1000 * 1.0 / (t2 - t1));
+ System.out.println(
+ "Scan: from region scanner" +
+ ", kvs = " + numKVs +
+ ", bytes = " + String.format("%1$,.2f", numBytesInMB) + " MB" +
+ ", time = " + (t2 - t1) + " ms" +
+ ", rate = " + String.format("%1$,.2f", rate) + "MB/s"
+ );
+ }
+ }
+
+ public HRegion createAndbulkLoadHRegion(String tableNameStr,
+ int kvSize, int numKVs) throws IOException {
+ // cleanup old data
+ Path basedir = new Path(this.hbaseRootDir, tableNameStr);
+ deleteDir(basedir);
+
+ // setup the region
+ HLog wal = createWAL(this.conf);
+ HTableDescriptor htd = new HTableDescriptor(tableNameStr);
+ HColumnDescriptor a = new HColumnDescriptor(Bytes.toBytes("a"));
+ htd.addFamily(a);
+ HRegionInfo hri = new HRegionInfo(htd, null, null, false);
+ HRegion region = HRegion.openHRegion(hri, basedir, wal, this.conf);
+
+ // bulk load some data
+ Path f = new Path(basedir, "hfile");
+ HFile.Writer writer =
+ HFile.getWriterFactoryNoCache(conf).withPath(fs, f).create();
+ byte [] family =
+ hri.getTableDesc().getFamilies().iterator().next().getName();
+ byte [] row = Bytes.toBytes(tableNameStr);
+ byte [] value = new byte[kvSize];
+ (new Random()).nextBytes(value);
+ for (int i = 0; i < numKVs; i++) {
+ writer.append(new KeyValue(row, family, Bytes.toBytes(i),
+ System.currentTimeMillis(), value));
+ }
+ writer.close();
+ region.bulkLoadHFile(f.toString(), family);
+ return region;
+ }
+
+ private void deleteDir(final Path p) throws IOException {
+ if (this.fs.exists(p)) {
+ if (!this.fs.delete(p, true)) {
+ throw new IOException("Failed remove of " + p);
+ }
+ }
+ }
+ private HLog createWAL(final Configuration c) throws IOException {
+ HLog wal = new HLog(FileSystem.get(c), logDir, oldLogDir, c, null);
+ return wal;
+ }
+
+ public static void main(String[] args) throws Throwable {
+ RegionScannerBenchmark benchmark = new RegionScannerBenchmark();
+ benchmark.runBenchmark();
+ }
+}
Modified: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/ScanBenchmark.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/ScanBenchmark.java?rev=1401280&r1=1401279&r2=1401280&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/ScanBenchmark.java (original)
+++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/ScanBenchmark.java Tue Oct 23 12:51:38 2012
@@ -2,26 +2,19 @@ package org.apache.hadoop.hbase.benchmar
import java.io.IOException;
import java.util.ArrayList;
-import java.util.HashMap;
import java.util.List;
-import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.TableExistsException;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.loadtest.ColumnFamilyProperties;
import org.apache.hadoop.hbase.loadtest.HBaseUtils;
import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.LoadTestTool;
-import org.apache.log4j.Level;
-import org.apache.log4j.Logger;
/**
* This test compares the performance of scan when all the data is in memory
@@ -33,12 +26,12 @@ public class ScanBenchmark extends Bench
private static final long PRINT_INTERVAL_KVS = 1000000;
private byte[] tableName = Bytes.toBytes("bench.ScanFromMemoryPerf");
private static Integer[] SET_CACHING_VALUES = {
-// 1000, 2000, 3000, 4000,
-// 5000, 6000, 7000, 8000,
+ 5000, 6000, 7000, 8000,
9000, 10000, 11000, 12000,
- 13000, 14000,
+ 13000, 14000, 15000, 16000,
+ 17000, 18000, 19000, 20000
};
- private static Integer[] SET_PREFETCH_VALUES = { 0 };
+ private static Integer[] SET_PREFETCH_VALUES = { 0 };
public void initBenchmarkResults() {
List<String> header = new ArrayList<String>();
@@ -52,9 +45,12 @@ public class ScanBenchmark extends Bench
public void runBenchmark() throws Throwable {
// populate the table
- createTableAndLoadData(tableName, 50, 1000000);
- // warm block cache
- runExperiment(false, 10000, 0);
+ createTableAndLoadData(tableName, 50, 1000000, true);
+ // warm block cache, force jit compilation
+ System.out.println("Warming blockcache and forcing JIT compilation...");
+ for (int i = 0; i < 20; i++) {
+ runExperiment(false, 10000, 0);
+ }
for (int caching : SET_CACHING_VALUES) {
for (int prefetch : SET_PREFETCH_VALUES) {
try {
@@ -93,11 +89,15 @@ public class ScanBenchmark extends Bench
if (numKVs > printAfterNumKVs) {
printAfterNumKVs += PRINT_INTERVAL_KVS;
- if (printStats) printStats(numKVs, numBytes, startTime, caching, prefetch);
+ if (printStats) {
+ printStats(numKVs, numBytes, startTime, caching, prefetch);
+ }
}
}
- if (printStats) printStats(numKVs, numBytes, startTime, caching, prefetch);
+ if (printStats) {
+ printStats(numKVs, numBytes, startTime, caching, prefetch);
+ }
scanner.close();
}