You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by li...@apache.org on 2012/11/29 01:05:52 UTC
svn commit: r1415003 - in
/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks:
Benchmark.java GetBenchmark.java RegionGetBenchmark.java
Author: liyin
Date: Thu Nov 29 00:05:51 2012
New Revision: 1415003
URL: http://svn.apache.org/viewvc?rev=1415003&view=rev
Log:
[HBASE-7068] Enhancements to get benchmark and adding HRegion get benchmark
Author: kranganathan
Summary:
This benchmark does the following:
- Create 20 regions
- Populate (bulk load) each region with 1M key-values, 90 bytes each
- Read directly using the HRegion as a library using a variable number of threads (cached workload)
Was able to get 1.34M get ops/sec at peak using 20 regions and 110 threads.
Improved the get benchmark which was using a lot of CPU in generating random keys to read.
Test Plan: Ran the benchmark.
Reviewers: kannan, liyintang, aaiyer
Reviewed By: liyintang
CC: hbase-eng@
Differential Revision: https://phabricator.fb.com/D635918
Added:
hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/RegionGetBenchmark.java
Modified:
hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/Benchmark.java
hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/GetBenchmark.java
Modified: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/Benchmark.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/Benchmark.java?rev=1415003&r1=1415002&r2=1415003&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/Benchmark.java (original)
+++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/Benchmark.java Thu Nov 29 00:05:51 2012
@@ -1,6 +1,7 @@
package org.apache.hadoop.hbase.benchmarks;
import java.io.IOException;
+import java.util.List;
import java.util.NavigableMap;
import java.util.Random;
@@ -128,7 +129,7 @@ public abstract class Benchmark {
public HTable createTableAndLoadData(byte[] tableName, int kvSize,
long numKVs, boolean bulkLoad) throws IOException {
return createTableAndLoadData(tableName, "cf1", kvSize,
- numKVs, 1, bulkLoad);
+ numKVs, 1, bulkLoad, null);
}
/**
@@ -140,12 +141,13 @@ public abstract class Benchmark {
* @param numKVs number of kv's to write into the table and cf
* @param numRegionsPerRS numer of regions per RS, 0 for one region
* @param bulkLoad if true, create HFile and load. Else do puts.
+ * @param keys if not null, fills in the keys populated for bulk load case.
* @return HTable instance to the table just created
* @throws IOException
*/
public HTable createTableAndLoadData(byte[] tableName, String cfNameStr,
- int kvSize, long numKVs, int numRegionsPerRS, boolean bulkLoad)
- throws IOException {
+ int kvSize, long numKVs, int numRegionsPerRS, boolean bulkLoad,
+ List<byte[]> keysWritten) throws IOException {
HTable htable = null;
try {
htable = new HTable(conf, tableName);
@@ -189,15 +191,46 @@ public abstract class Benchmark {
LOG.error("Failed to create table", e);
System.exit(0);
}
- }
+ }
+
+ // get the table again
+ if (htable == null) {
+ try {
+ htable = new HTable(conf, tableName);
+ LOG.info("Table " + new String(tableName) + " exists, skipping create.");
+ } catch (IOException e) {
+ LOG.info("Table " + new String(tableName) + " does not exist.");
+ }
+ }
// check if the table has any data
Scan scan = new Scan();
scan.addFamily(Bytes.toBytes(cfNameStr));
ResultScanner scanner = htable.getScanner(scan);
Result result = scanner.next();
+
+ // if it has data we are done. Regenerate the keys we would have written if
+ // needed
if (result != null && !result.isEmpty()) {
LOG.info("Table " + new String(tableName) + " has data, skipping load");
+ if (keysWritten != null) {
+ NavigableMap<HRegionInfo, HServerAddress> regionsToRS =
+ htable.getRegionsInfo();
+ // bulk load some data into the tables
+ long numKVsInRegion = Math.round(numKVs * 1.0 / numRegions);
+ for (HRegionInfo hRegionInfo : regionsToRS.keySet()) {
+ // skip the first region which has an empty start key
+ if ("".equals(new String(hRegionInfo.getStartKey()))) {
+ continue;
+ }
+ long startKey = getLongFromRowKey(hRegionInfo.getStartKey());
+ long rowID = startKey;
+ for (; rowID < startKey + numKVsInRegion; rowID++) {
+ byte[] row = getRowKeyFromLong(rowID);
+ keysWritten.add(row);
+ }
+ }
+ }
return htable;
}
LOG.info("Table " + new String(tableName) + " has no data, loading");
@@ -226,7 +259,7 @@ public abstract class Benchmark {
Path hbaseRootDir = new Path(this.conf.get(HConstants.HBASE_DIR));
Path basedir = new Path(hbaseRootDir, new String(tableName));
bulkLoadDataForRegion(fs, basedir, hRegionInfo, regionServer,
- cfNameStr, kvSize, numKVsInRegion);
+ cfNameStr, kvSize, numKVsInRegion, keysWritten);
}
}
else {
@@ -254,9 +287,10 @@ public abstract class Benchmark {
* Create a HFile and bulk load it for a given region
* @throws IOException
*/
- private void bulkLoadDataForRegion(FileSystem fs, Path basedir,
+ public void bulkLoadDataForRegion(FileSystem fs, Path basedir,
HRegionInfo hRegionInfo, HRegionInterface regionServer, String cfNameStr,
- int kvSize, long numKVsInRegion) throws IOException {
+ int kvSize, long numKVsInRegion, List<byte[]> keysWritten)
+ throws IOException {
// create an hfile
Path hFile = new Path(basedir, "hfile." + hRegionInfo.getEncodedName() +
"." + System.currentTimeMillis());
@@ -273,6 +307,7 @@ public abstract class Benchmark {
byte[] row = getRowKeyFromLong(rowID);
writer.append(new KeyValue(row, family, Bytes.toBytes(rowID),
System.currentTimeMillis(), value));
+ if (keysWritten != null) keysWritten.add(row);
}
writer.close();
LOG.info("Done creating data file: " + hFile.getName() +
@@ -283,17 +318,19 @@ public abstract class Benchmark {
(new String(hRegionInfo.getEndKey())).trim() + ")");
// bulk load the file
- regionServer.bulkLoadHFile(hFile.toString(), hRegionInfo.getRegionName(),
- Bytes.toBytes(cfNameStr), true);
- LOG.info("Done bulk-loading data file [" + hFile.getName() +
- "] for region [" + hRegionInfo.getEncodedName() + "]");
+ if (regionServer != null) {
+ regionServer.bulkLoadHFile(hFile.toString(), hRegionInfo.getRegionName(),
+ Bytes.toBytes(cfNameStr), true);
+ LOG.info("Done bulk-loading data file [" + hFile.getName() +
+ "] for region [" + hRegionInfo.getEncodedName() + "]");
+ }
}
- protected byte[] getRowKeyFromLong(long l) {
+ public static byte[] getRowKeyFromLong(long l) {
return Bytes.toBytes(String.format("%20d", l));
}
- protected long getLongFromRowKey(byte[] rowKey) {
+ public static long getLongFromRowKey(byte[] rowKey) {
return Long.parseLong((new String(rowKey)).trim());
}
Modified: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/GetBenchmark.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/GetBenchmark.java?rev=1415003&r1=1415002&r2=1415003&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/GetBenchmark.java (original)
+++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/GetBenchmark.java Thu Nov 29 00:05:51 2012
@@ -18,12 +18,14 @@ public class GetBenchmark extends Benchm
public static final Log LOG = LogFactory.getLog(GetBenchmark.class);
static byte[] tableName = Bytes.toBytes("bench.GetFromMemory");
static String cfName = "cf1";
- private static Integer[] CLIENT_THREADS = { 40, 50, 60, 70, 80, 90, 100 };
- private static Integer[] NUM_CONNECTIONS = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+ private static Integer[] CLIENT_THREADS =
+ { 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200 };
+ private static Integer[] NUM_CONNECTIONS = { 5, 6, 7, 8, 9, 10 };
public static Configuration[] connCountConfs = new Configuration[100];
public static final int numRegionsPerRS = 10;
public static final int kvSize = 50;
public static int numKVs = 1000000;
+ static List<byte[]> keysWritten = new ArrayList<byte[]>();
/**
* Initialize the benchmark results tracking and output.
@@ -41,7 +43,8 @@ public class GetBenchmark extends Benchm
public void runBenchmark() throws Throwable {
// populate the table, bulk load it
createTableAndLoadData(tableName, cfName, kvSize, numKVs, numRegionsPerRS,
- true);
+ true, keysWritten);
+ System.out.println("Total kvs = " + keysWritten.size());
// warm block cache, force jit compilation
System.out.println("Warming blockcache and forcing JIT compilation...");
runExperiment("warmup-", false, 1, 100*1000, 1);
@@ -147,15 +150,14 @@ public class GetBenchmark extends Benchm
try {
// create a new HTable instance
HTable htable = new HTable(conf, tableName);
- long rowKey = 0;
+ byte[] rowKey = null;
// number of reads we have performed
long numSuccessfulGets = 0;
while (numSuccessfulGets < numGetsToPerform) {
// create a random key in the range passed in
- rowKey = startKey + random.nextInt(endKey - startKey);
+ rowKey = keysWritten.get(random.nextInt(keysWritten.size()));
// keys are assumed to be 20 chars
- Get get =
- new Get(Bytes.toBytes(String.format("%20d", rowKey)));
+ Get get = new Get(rowKey);
get.addFamily(cf);
// time the actual get
long t1 = System.currentTimeMillis();
Added: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/RegionGetBenchmark.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/RegionGetBenchmark.java?rev=1415003&view=auto
==============================================================================
--- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/RegionGetBenchmark.java (added)
+++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/RegionGetBenchmark.java Thu Nov 29 00:05:51 2012
@@ -0,0 +1,234 @@
+package org.apache.hadoop.hbase.benchmarks;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.regionserver.wal.HLog;
+import org.apache.hadoop.hbase.util.Bytes;
+
+public class RegionGetBenchmark extends Benchmark {
+ public static final Log LOG = LogFactory.getLog(RegionGetBenchmark.class);
+ static String cfName = "cf1";
+ private static Integer[] NUM_THREADS = { 50, 60, 70, 80, 90, 100, 110, 120 };
+ private static Integer[] NUM_REGIONS = { 10, 15, 20 };
+ private Configuration conf;
+ private FileSystem fs;
+ private Path hbaseRootDir = null;
+ private Path oldLogDir;
+ private Path logDir;
+ private static final String tableName = "dummyTable";
+ public static final int kvSize = 50;
+ public static int numKVs = 1000000;
+ static List<byte[]> keysWritten = new ArrayList<byte[]>();
+
+ public RegionGetBenchmark() throws IOException {
+ conf = HBaseConfiguration.create();
+ fs = FileSystem.get(conf);
+ this.hbaseRootDir = new Path(this.conf.get(HConstants.HBASE_DIR));
+ this.oldLogDir =
+ new Path(this.hbaseRootDir, HConstants.HREGION_OLDLOGDIR_NAME);
+ this.logDir = new Path(this.hbaseRootDir, HConstants.HREGION_LOGDIR_NAME);
+
+ reinitialize();
+ }
+
+ public void reinitialize() throws IOException {
+ if (fs.exists(this.hbaseRootDir)) {
+ fs.delete(this.hbaseRootDir, true);
+ }
+ Path rootdir = fs.makeQualified(new Path(conf.get(HConstants.HBASE_DIR)));
+ fs.mkdirs(rootdir);
+ }
+
+ /**
+ * Initialize the benchmark results tracking and output.
+ */
+ public void initBenchmarkResults() {
+ List<String> header = new ArrayList<String>();
+ header.add("Threads");
+ for (int i = 0; i < NUM_REGIONS.length; i++) {
+ header.add(" " + NUM_REGIONS[i] + " regions");
+ }
+ benchmarkResults = new BenchmarkResults<Integer, Integer, Double>(
+ NUM_THREADS, NUM_REGIONS, " %5d", " %5.2f", header);
+ }
+
+ public void runBenchmark() throws Throwable {
+ // cleanup old data
+ Path basedir = new Path(this.hbaseRootDir, tableName);
+ if (this.fs.exists(basedir)) {
+ if (!this.fs.delete(basedir, true)) {
+ throw new IOException("Failed remove of " + basedir);
+ }
+ }
+
+ // create some data to read
+ HLog wal = new HLog(FileSystem.get(conf), logDir, oldLogDir, conf, null);
+ HTableDescriptor htd = new HTableDescriptor(tableName);
+ HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toBytes(cfName));
+ hcd.setBlocksize(4 * 1024);
+ htd.addFamily(hcd);
+ HRegionInfo hRegionInfo =
+ new HRegionInfo(htd, getRowKeyFromLong(0), null, false);
+ bulkLoadDataForRegion(fs, basedir, hRegionInfo, null, cfName, kvSize,
+ numKVs, keysWritten);
+
+ // setup all the regions
+ int maxRegions = NUM_REGIONS[0];
+ for (int numRegion : NUM_REGIONS) {
+ if (numRegion > maxRegions) maxRegions = numRegion;
+ }
+ HRegion[] hRegions = new HRegion[maxRegions];
+ for (int i = 0; i < maxRegions; i++) {
+ hRegions[i] = HRegion.openHRegion(hRegionInfo, basedir, wal, this.conf);
+ }
+ System.out.println("Total kvs = " + keysWritten.size() +
+ ", num regions = " + NUM_REGIONS);
+ // warm block cache, force jit compilation
+ System.out.println("Warming blockcache and forcing JIT compilation...");
+ runExperiment("warmup-", false, 1, 100*1000, hRegions, 1);
+
+ for (int numRegions : NUM_REGIONS) {
+ for (int numThreads : NUM_THREADS) {
+ try {
+ // read enough KVs to benchmark within a reasonable time
+ long numKVsToRead = 100*1000;
+ if (numThreads >= 5) numKVsToRead = 20*1000;
+ if (numThreads >= 40) numKVsToRead = 10*1000;
+ if (NUM_THREADS.length == 1) numKVsToRead = 10*1000*1000;
+ // run the experiment
+ runExperiment("t" + numThreads + "-", true, numThreads, numKVsToRead,
+ hRegions, numRegions);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+ }
+ }
+
+ public void runExperiment(String prefix, boolean printStats, int numThreads,
+ long numReadsPerThread, HRegion[] hRegions, int numRegions)
+ throws IOException {
+ // Prepare the read threads
+ RegionGetBenchMarkThread threads[] =
+ new RegionGetBenchMarkThread[numThreads];
+ for (int i = 0; i < numThreads; i++) {
+ threads[i] = new RegionGetBenchMarkThread(prefix+i,
+ Bytes.toBytes(tableName), Bytes.toBytes(cfName), 0, numKVs,
+ numReadsPerThread, hRegions[i%numRegions], true);
+ }
+ // start the read threads, each one times itself
+ for (int i = 0; i < numThreads; i++) {
+ threads[i].start();
+ }
+ // wait for all the threads and compute the total ops/sec
+ double totalOpsPerSec = 0;
+ int successThreads = 0;
+ for (int i = 0; i < numThreads; i++) {
+ try {
+ threads[i].join();
+ totalOpsPerSec += threads[i].getOpsPerSecond();
+ successThreads++;
+ } catch (InterruptedException e) {
+ LOG.error("Exception in thread " + i, e);
+ }
+ }
+ System.out.println("Num threads = " + successThreads + ", " +
+ "performance = " + String.format("%5.2f", totalOpsPerSec) + " ops/sec");
+ // add to the benchmark results
+ benchmarkResults.addResult(numThreads, numRegions, totalOpsPerSec);
+ }
+
+ /**
+ * Thread that performs a given number of read operations and computes the
+ * number of read operations per second it was able to get.
+ */
+ public static class RegionGetBenchMarkThread extends Thread {
+ public static final long PRINT_INTERVAL = 20000;
+ String name;
+ byte[] table;
+ byte[] cf;
+ int startKey;
+ int endKey;
+ long numGetsToPerform;
+ HRegion hRegion;
+ long timeTakenMillis = 0;
+ boolean debug = false;
+ Random random = new Random();
+
+ public RegionGetBenchMarkThread(String name, byte[] table, byte[] cf,
+ int startKey, int endKey, long numGetsToPerform, HRegion hRegion,
+ boolean debug) {
+ this.name = name;
+ this.table = table;
+ this.cf = cf;
+ this.startKey = startKey;
+ this.endKey = endKey;
+ this.numGetsToPerform = numGetsToPerform;
+ this.hRegion = hRegion;
+ this.debug = debug;
+ }
+
+ /**
+ * Returns the number of ops/second at the current moment.
+ */
+ public double getOpsPerSecond() {
+ return (numGetsToPerform * 1.0 * 1000 / timeTakenMillis);
+ }
+
+ public void run() {
+ try {
+ byte[] rowKey = null;
+ // number of reads we have performed
+ long numSuccessfulGets = 0;
+ while (numSuccessfulGets < numGetsToPerform) {
+ // create a random key in the range passed in
+ rowKey = keysWritten.get(random.nextInt(keysWritten.size()));
+ // keys are assumed to be 20 chars
+ Get get = new Get(rowKey);
+ get.addFamily(cf);
+ // time the actual get
+ long t1 = System.currentTimeMillis();
+ hRegion.get(get, null);
+ timeTakenMillis += System.currentTimeMillis() - t1;
+ numSuccessfulGets++;
+ // print progress if needed
+ if (debug && numSuccessfulGets % PRINT_INTERVAL == 0) {
+ double opsPerSec =
+ (numSuccessfulGets * 1.0 * 1000 / timeTakenMillis);
+ LOG.debug("[Thread-" + name + "] " + "" +
+ "Num gets = " + numSuccessfulGets + "/" + numGetsToPerform +
+ ", current rate = " + String.format("%.2f", opsPerSec) +
+ " ops/sec");
+ }
+ }
+ } catch (IOException e) {
+ LOG.error("IOException while running read thread, will exit", e);
+ }
+ }
+ }
+
+ public static void main(String[] args) throws Throwable {
+ String className =
+ Thread.currentThread().getStackTrace()[1].getClassName();
+ System.out.println("Running benchmark " + className);
+ @SuppressWarnings("unchecked")
+ Class<? extends Benchmark> benchmarkClass =
+ (Class<? extends Benchmark>)Class.forName(className);
+ Benchmark.benchmarkRunner(benchmarkClass, args);
+ }
+}