You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by li...@apache.org on 2012/11/29 01:05:52 UTC

svn commit: r1415003 - in /hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks: Benchmark.java GetBenchmark.java RegionGetBenchmark.java

Author: liyin
Date: Thu Nov 29 00:05:51 2012
New Revision: 1415003

URL: http://svn.apache.org/viewvc?rev=1415003&view=rev
Log:
[HBASE-7068] Enhancements to get benchmark and adding HRegion get benchmark

Author: kranganathan

Summary:
This benchmark does the following:
- Create 20 regions
- Populate (bulk load) each region with 1M key-values, 90 bytes each
- Read directly using the HRegion as a library using a variable number of threads (cached workload)

Was able to get 1.34M get ops/sec at peak using 20 regions and 110 threads.

Improved the get benchmark which was using a lot of CPU in generating random keys to read.

Test Plan: Ran the benchmark.

Reviewers: kannan, liyintang, aaiyer

Reviewed By: liyintang

CC: hbase-eng@

Differential Revision: https://phabricator.fb.com/D635918

Added:
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/RegionGetBenchmark.java
Modified:
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/Benchmark.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/GetBenchmark.java

Modified: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/Benchmark.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/Benchmark.java?rev=1415003&r1=1415002&r2=1415003&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/Benchmark.java (original)
+++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/Benchmark.java Thu Nov 29 00:05:51 2012
@@ -1,6 +1,7 @@
 package org.apache.hadoop.hbase.benchmarks;
 
 import java.io.IOException;
+import java.util.List;
 import java.util.NavigableMap;
 import java.util.Random;
 
@@ -128,7 +129,7 @@ public abstract class Benchmark {
   public HTable createTableAndLoadData(byte[] tableName, int kvSize, 
       long numKVs, boolean bulkLoad) throws IOException {
     return createTableAndLoadData(tableName, "cf1", kvSize, 
-        numKVs, 1, bulkLoad);
+        numKVs, 1, bulkLoad, null);
   }
   
   /**
@@ -140,12 +141,13 @@ public abstract class Benchmark {
    * @param numKVs number of kv's to write into the table and cf
    * @param numRegionsPerRS numer of regions per RS, 0 for one region
    * @param bulkLoad if true, create HFile and load. Else do puts.
+   * @param keys if not null, fills in the keys populated for bulk load case.
    * @return HTable instance to the table just created
    * @throws IOException
    */
   public HTable createTableAndLoadData(byte[] tableName, String cfNameStr, 
-      int kvSize, long numKVs, int numRegionsPerRS, boolean bulkLoad) 
-  throws IOException {
+      int kvSize, long numKVs, int numRegionsPerRS, boolean bulkLoad, 
+      List<byte[]> keysWritten) throws IOException {
     HTable htable = null;
     try {
       htable = new HTable(conf, tableName);
@@ -189,15 +191,46 @@ public abstract class Benchmark {
         LOG.error("Failed to create table", e);
         System.exit(0);
       }
-    } 
+    }
+    
+    // get the table again
+    if (htable == null) {
+      try {
+        htable = new HTable(conf, tableName);
+        LOG.info("Table " + new String(tableName) + " exists, skipping create.");
+      } catch (IOException e) {
+        LOG.info("Table " + new String(tableName) + " does not exist.");
+      }
+    }
     
     // check if the table has any data
     Scan scan = new Scan();
     scan.addFamily(Bytes.toBytes(cfNameStr));
     ResultScanner scanner = htable.getScanner(scan);
     Result result = scanner.next();
+    
+    // if it has data we are done. Regenerate the keys we would have written if 
+    // needed
     if (result != null && !result.isEmpty()) {
       LOG.info("Table " + new String(tableName) + " has data, skipping load");
+      if (keysWritten != null) {
+        NavigableMap<HRegionInfo, HServerAddress> regionsToRS = 
+          htable.getRegionsInfo();
+        // bulk load some data into the tables
+        long numKVsInRegion = Math.round(numKVs * 1.0 / numRegions);
+        for (HRegionInfo hRegionInfo : regionsToRS.keySet()) {
+          // skip the first region which has an empty start key
+          if ("".equals(new String(hRegionInfo.getStartKey()))) {
+            continue;
+          }
+          long startKey = getLongFromRowKey(hRegionInfo.getStartKey());
+          long rowID = startKey;
+          for (; rowID < startKey + numKVsInRegion; rowID++) {
+            byte[] row = getRowKeyFromLong(rowID);
+            keysWritten.add(row);
+          }
+        }
+      }
       return htable;
     }
     LOG.info("Table " + new String(tableName) + " has no data, loading");
@@ -226,7 +259,7 @@ public abstract class Benchmark {
         Path hbaseRootDir = new Path(this.conf.get(HConstants.HBASE_DIR));
         Path basedir = new Path(hbaseRootDir, new String(tableName));
         bulkLoadDataForRegion(fs, basedir, hRegionInfo, regionServer, 
-            cfNameStr, kvSize, numKVsInRegion);
+            cfNameStr, kvSize, numKVsInRegion, keysWritten);
       }
     } 
     else {
@@ -254,9 +287,10 @@ public abstract class Benchmark {
    * Create a HFile and bulk load it for a given region
    * @throws IOException 
    */
-  private void bulkLoadDataForRegion(FileSystem fs, Path basedir, 
+  public void bulkLoadDataForRegion(FileSystem fs, Path basedir, 
       HRegionInfo hRegionInfo, HRegionInterface regionServer, String cfNameStr, 
-      int kvSize, long numKVsInRegion) throws IOException {
+      int kvSize, long numKVsInRegion, List<byte[]> keysWritten) 
+  throws IOException {
     // create an hfile
     Path hFile =  new Path(basedir, "hfile." + hRegionInfo.getEncodedName() + 
         "." + System.currentTimeMillis());
@@ -273,6 +307,7 @@ public abstract class Benchmark {
       byte[] row = getRowKeyFromLong(rowID);
       writer.append(new KeyValue(row, family, Bytes.toBytes(rowID), 
           System.currentTimeMillis(), value));
+      if (keysWritten != null) keysWritten.add(row);
     }
     writer.close();
     LOG.info("Done creating data file: " + hFile.getName() + 
@@ -283,17 +318,19 @@ public abstract class Benchmark {
         (new String(hRegionInfo.getEndKey())).trim() + ")");
 
     // bulk load the file
-    regionServer.bulkLoadHFile(hFile.toString(), hRegionInfo.getRegionName(), 
-        Bytes.toBytes(cfNameStr), true);
-    LOG.info("Done bulk-loading data file [" + hFile.getName() + 
-        "] for region [" + hRegionInfo.getEncodedName() + "]");
+    if (regionServer != null) {
+      regionServer.bulkLoadHFile(hFile.toString(), hRegionInfo.getRegionName(), 
+          Bytes.toBytes(cfNameStr), true);
+      LOG.info("Done bulk-loading data file [" + hFile.getName() + 
+          "] for region [" + hRegionInfo.getEncodedName() + "]");
+    }
   }
   
-  protected byte[] getRowKeyFromLong(long l) {
+  public static byte[] getRowKeyFromLong(long l) {
     return Bytes.toBytes(String.format("%20d", l));
   }
   
-  protected long getLongFromRowKey(byte[] rowKey) {
+  public static long getLongFromRowKey(byte[] rowKey) {
     return Long.parseLong((new String(rowKey)).trim());
   }
   

Modified: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/GetBenchmark.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/GetBenchmark.java?rev=1415003&r1=1415002&r2=1415003&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/GetBenchmark.java (original)
+++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/GetBenchmark.java Thu Nov 29 00:05:51 2012
@@ -18,12 +18,14 @@ public class GetBenchmark extends Benchm
   public static final Log LOG = LogFactory.getLog(GetBenchmark.class);
   static byte[] tableName = Bytes.toBytes("bench.GetFromMemory");
   static String cfName = "cf1";
-  private static Integer[] CLIENT_THREADS = { 40, 50, 60, 70, 80, 90, 100 };
-  private static Integer[] NUM_CONNECTIONS = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
+  private static Integer[] CLIENT_THREADS = 
+    { 70, 80, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200 };
+  private static Integer[] NUM_CONNECTIONS = { 5, 6, 7, 8, 9, 10 };
   public static Configuration[] connCountConfs = new Configuration[100];
   public static final int numRegionsPerRS = 10;
   public static final int kvSize = 50;
   public static int numKVs = 1000000;
+  static List<byte[]> keysWritten = new ArrayList<byte[]>();
   
   /**
    * Initialize the benchmark results tracking and output.
@@ -41,7 +43,8 @@ public class GetBenchmark extends Benchm
   public void runBenchmark() throws Throwable {
     // populate the table, bulk load it
     createTableAndLoadData(tableName, cfName, kvSize, numKVs, numRegionsPerRS, 
-        true);
+        true, keysWritten);
+    System.out.println("Total kvs = " + keysWritten.size());
     // warm block cache, force jit compilation
     System.out.println("Warming blockcache and forcing JIT compilation...");
     runExperiment("warmup-", false, 1, 100*1000, 1);
@@ -147,15 +150,14 @@ public class GetBenchmark extends Benchm
       try {
         // create a new HTable instance
         HTable htable = new HTable(conf, tableName);
-        long rowKey = 0;
+        byte[] rowKey = null;
         // number of reads we have performed
         long numSuccessfulGets = 0;
         while (numSuccessfulGets < numGetsToPerform) {
           // create a random key in the range passed in
-          rowKey = startKey + random.nextInt(endKey - startKey);
+          rowKey = keysWritten.get(random.nextInt(keysWritten.size()));
           // keys are assumed to be 20 chars
-          Get get = 
-            new Get(Bytes.toBytes(String.format("%20d", rowKey)));
+          Get get = new Get(rowKey);
           get.addFamily(cf);
           // time the actual get
           long t1 = System.currentTimeMillis();

Added: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/RegionGetBenchmark.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/RegionGetBenchmark.java?rev=1415003&view=auto
==============================================================================
--- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/RegionGetBenchmark.java (added)
+++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/RegionGetBenchmark.java Thu Nov 29 00:05:51 2012
@@ -0,0 +1,234 @@
+package org.apache.hadoop.hbase.benchmarks;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.client.Get;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.regionserver.wal.HLog;
+import org.apache.hadoop.hbase.util.Bytes;
+
+public class RegionGetBenchmark extends Benchmark {
+  public static final Log LOG = LogFactory.getLog(RegionGetBenchmark.class);
+  static String cfName = "cf1";
+  private static Integer[] NUM_THREADS = { 50, 60, 70, 80, 90, 100, 110, 120 };
+  private static Integer[] NUM_REGIONS = { 10, 15, 20 };
+  private Configuration conf;
+  private FileSystem fs;
+  private Path hbaseRootDir = null;
+  private Path oldLogDir;
+  private Path logDir;
+  private static final String tableName = "dummyTable";
+  public static final int kvSize = 50;
+  public static int numKVs = 1000000;
+  static List<byte[]> keysWritten = new ArrayList<byte[]>();
+  
+  public RegionGetBenchmark() throws IOException {
+    conf = HBaseConfiguration.create();
+    fs = FileSystem.get(conf);
+    this.hbaseRootDir = new Path(this.conf.get(HConstants.HBASE_DIR));
+    this.oldLogDir = 
+      new Path(this.hbaseRootDir, HConstants.HREGION_OLDLOGDIR_NAME);
+    this.logDir = new Path(this.hbaseRootDir, HConstants.HREGION_LOGDIR_NAME);
+    
+    reinitialize();
+  }
+  
+  public void reinitialize() throws IOException {
+    if (fs.exists(this.hbaseRootDir)) {
+      fs.delete(this.hbaseRootDir, true);
+    }
+    Path rootdir = fs.makeQualified(new Path(conf.get(HConstants.HBASE_DIR)));
+    fs.mkdirs(rootdir);    
+  }
+
+  /**
+   * Initialize the benchmark results tracking and output.
+   */
+  public void initBenchmarkResults() {
+    List<String> header = new ArrayList<String>();
+    header.add("Threads");
+    for (int i = 0; i < NUM_REGIONS.length; i++) {
+      header.add("   " +  NUM_REGIONS[i] + " regions");
+    }
+    benchmarkResults = new BenchmarkResults<Integer, Integer, Double>(
+        NUM_THREADS, NUM_REGIONS, "     %5d", "   %5.2f", header);
+  }
+  
+  public void runBenchmark() throws Throwable {
+    // cleanup old data
+    Path basedir = new Path(this.hbaseRootDir, tableName);
+    if (this.fs.exists(basedir)) {
+      if (!this.fs.delete(basedir, true)) {
+        throw new IOException("Failed remove of " + basedir);
+      }
+    }
+    
+    // create some data to read
+    HLog wal = new HLog(FileSystem.get(conf), logDir, oldLogDir, conf, null);
+    HTableDescriptor htd = new HTableDescriptor(tableName);
+    HColumnDescriptor hcd = new HColumnDescriptor(Bytes.toBytes(cfName));
+    hcd.setBlocksize(4 * 1024);
+    htd.addFamily(hcd);
+    HRegionInfo hRegionInfo = 
+      new HRegionInfo(htd, getRowKeyFromLong(0), null, false);
+    bulkLoadDataForRegion(fs, basedir, hRegionInfo, null, cfName, kvSize, 
+        numKVs, keysWritten);
+
+    // setup all the regions
+    int maxRegions = NUM_REGIONS[0];
+    for (int numRegion : NUM_REGIONS) {
+      if (numRegion > maxRegions) maxRegions = numRegion;
+    }    
+    HRegion[] hRegions = new HRegion[maxRegions];
+    for (int i = 0; i < maxRegions; i++) {
+      hRegions[i] = HRegion.openHRegion(hRegionInfo, basedir, wal, this.conf);
+    }
+    System.out.println("Total kvs = " + keysWritten.size() + 
+        ", num regions = " + NUM_REGIONS);
+    // warm block cache, force jit compilation
+    System.out.println("Warming blockcache and forcing JIT compilation...");
+    runExperiment("warmup-", false, 1, 100*1000, hRegions, 1);
+
+    for (int numRegions : NUM_REGIONS) {
+      for (int numThreads : NUM_THREADS) {
+        try {
+          // read enough KVs to benchmark within a reasonable time
+          long numKVsToRead = 100*1000;
+          if (numThreads >= 5) numKVsToRead = 20*1000;
+          if (numThreads >= 40) numKVsToRead = 10*1000;
+          if (NUM_THREADS.length == 1) numKVsToRead = 10*1000*1000;
+          // run the experiment
+          runExperiment("t" + numThreads + "-", true, numThreads, numKVsToRead, 
+              hRegions, numRegions);
+        } catch (IOException e) { 
+          e.printStackTrace();
+        }
+      }
+    }
+  }
+
+  public void runExperiment(String prefix, boolean printStats, int numThreads, 
+      long numReadsPerThread, HRegion[] hRegions, int numRegions) 
+  throws IOException {
+    // Prepare the read threads
+    RegionGetBenchMarkThread threads[] = 
+      new RegionGetBenchMarkThread[numThreads];
+    for (int i = 0; i < numThreads; i++) {
+      threads[i] = new RegionGetBenchMarkThread(prefix+i, 
+          Bytes.toBytes(tableName), Bytes.toBytes(cfName), 0, numKVs, 
+          numReadsPerThread, hRegions[i%numRegions], true);
+    }
+    // start the read threads, each one times itself
+    for (int i = 0; i < numThreads; i++) {
+      threads[i].start();
+    }
+    // wait for all the threads and compute the total ops/sec
+    double totalOpsPerSec = 0;
+    int successThreads = 0;
+    for (int i = 0; i < numThreads; i++) {
+      try {
+        threads[i].join();
+        totalOpsPerSec += threads[i].getOpsPerSecond();
+        successThreads++;
+      } catch (InterruptedException e) {
+        LOG.error("Exception in thread " + i, e);
+      }
+    }
+    System.out.println("Num threads =  " + successThreads + ", " + 
+        "performance = " + String.format("%5.2f", totalOpsPerSec) + " ops/sec");
+    // add to the benchmark results
+    benchmarkResults.addResult(numThreads, numRegions, totalOpsPerSec);
+  }
+  
+  /**
+   * Thread that performs a given number of read operations and computes the 
+   * number of read operations per second it was able to get.
+   */
+  public static class RegionGetBenchMarkThread extends Thread {
+    public static final long PRINT_INTERVAL = 20000;
+    String name;
+    byte[] table;
+    byte[] cf;
+    int startKey;
+    int endKey;
+    long numGetsToPerform;
+    HRegion hRegion;
+    long timeTakenMillis = 0;
+    boolean debug = false;
+    Random random = new Random();
+    
+    public RegionGetBenchMarkThread(String name, byte[] table, byte[] cf, 
+        int startKey, int endKey, long numGetsToPerform, HRegion hRegion, 
+        boolean debug) {
+      this.name = name;
+      this.table = table;
+      this.cf = cf;
+      this.startKey = startKey;
+      this.endKey = endKey;
+      this.numGetsToPerform = numGetsToPerform;
+      this.hRegion = hRegion;
+      this.debug = debug;
+    }
+    
+    /**
+     * Returns the number of ops/second at the current moment.
+     */
+    public double getOpsPerSecond() {
+      return (numGetsToPerform * 1.0 * 1000 / timeTakenMillis);
+    }
+    
+    public void run() {
+      try {
+        byte[] rowKey = null;
+        // number of reads we have performed
+        long numSuccessfulGets = 0;
+        while (numSuccessfulGets < numGetsToPerform) {
+          // create a random key in the range passed in
+          rowKey = keysWritten.get(random.nextInt(keysWritten.size()));
+          // keys are assumed to be 20 chars
+          Get get = new Get(rowKey);
+          get.addFamily(cf);
+          // time the actual get
+          long t1 = System.currentTimeMillis();
+          hRegion.get(get, null);
+          timeTakenMillis += System.currentTimeMillis() - t1;
+          numSuccessfulGets++;
+          // print progress if needed
+          if (debug && numSuccessfulGets % PRINT_INTERVAL == 0) {
+            double opsPerSec = 
+              (numSuccessfulGets * 1.0 * 1000 / timeTakenMillis);
+            LOG.debug("[Thread-" + name + "] " + "" +
+                "Num gets = " + numSuccessfulGets + "/" + numGetsToPerform + 
+                ", current rate = " + String.format("%.2f", opsPerSec) + 
+                " ops/sec");
+          }
+        }
+      } catch (IOException e) {
+        LOG.error("IOException while running read thread, will exit", e);
+      }
+    }
+  }
+
+  public static void main(String[] args) throws Throwable {
+    String className = 
+      Thread.currentThread().getStackTrace()[1].getClassName();
+    System.out.println("Running benchmark " + className);
+    @SuppressWarnings("unchecked")
+    Class<? extends Benchmark> benchmarkClass = 
+      (Class<? extends Benchmark>)Class.forName(className);
+    Benchmark.benchmarkRunner(benchmarkClass, args);
+  }
+}