You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by mb...@apache.org on 2012/10/23 14:51:38 UTC

svn commit: r1401280 - in /hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks: Benchmark.java RegionScannerBenchmark.java ScanBenchmark.java

Author: mbautin
Date: Tue Oct 23 12:51:38 2012
New Revision: 1401280

URL: http://svn.apache.org/viewvc?rev=1401280&view=rev
Log:
[HBASE-6923] Improvements to scan benchmark

Author: kranganathan

Summary:
- Added a framework for running benchmark experiments
- Class to track results and reporting
- RegionScanner benchmark

Test Plan: Tested by running benchmarks

Reviewers: kannan

Reviewed By: kannan

CC: hbase-eng@

Differential Revision: https://phabricator.fb.com/D607625

Added:
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/RegionScannerBenchmark.java
Modified:
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/Benchmark.java
    hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/ScanBenchmark.java

Modified: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/Benchmark.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/Benchmark.java?rev=1401280&r1=1401279&r2=1401280&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/Benchmark.java (original)
+++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/Benchmark.java Tue Oct 23 12:51:38 2012
@@ -6,6 +6,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.client.HBaseAdmin;
 import org.apache.hadoop.hbase.client.HTable;
 import org.apache.hadoop.hbase.loadtest.ColumnFamilyProperties;
 import org.apache.hadoop.hbase.loadtest.HBaseUtils;
@@ -67,7 +68,7 @@ public abstract class Benchmark {
    * @throws IOException 
    */
   public HTable createTableAndLoadData(byte[] tableName, int kvSize, 
-      long numKVs) throws IOException {
+      long numKVs, boolean flushData) throws IOException {
     HTable htable = null;
     try {
       htable = new HTable(conf, tableName);
@@ -95,9 +96,9 @@ public abstract class Benchmark {
     LOG.info("Loading data for the table");
     String[] loadTestToolArgs = {
       "-zk", "localhost", 
-      "-tn", "bench.ScanFromMemoryPerf",
+      "-tn", new String(tableName),
       "-cf", familyProperty.familyName,
-      "-write", "1:50", 
+      "-write", "1:" + kvSize, 
       "-num_keys", "" + numKVs, 
       "-multiput",
       "-compression", "NONE",
@@ -105,6 +106,18 @@ public abstract class Benchmark {
     LoadTestTool.doMain(loadTestToolArgs);
     LOG.info("Done loading data");
     
+    if (flushData) {
+      LOG.info("Flush of data requested");
+      HBaseAdmin admin = new HBaseAdmin(conf);
+      admin.flush(tableName);
+      try {
+        Thread.sleep(2*1000);
+      } catch (InterruptedException e) {
+        e.printStackTrace();
+      }
+      LOG.info("Done flushing data");
+    }
+    
     htable = new HTable(conf, tableName);
     return htable;
   }

Added: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/RegionScannerBenchmark.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/RegionScannerBenchmark.java?rev=1401280&view=auto
==============================================================================
--- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/RegionScannerBenchmark.java (added)
+++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/RegionScannerBenchmark.java Tue Oct 23 12:51:38 2012
@@ -0,0 +1,155 @@
+package org.apache.hadoop.hbase.benchmarks;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.HTableDescriptor;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.client.Result;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.regionserver.InternalScanner;
+import org.apache.hadoop.hbase.regionserver.wal.HLog;
+import org.apache.hadoop.hbase.util.Bytes;
+
+/**
+ * This test benchmarks the performance of scanning from the block cache when 
+ * run as a library. Note that this will not have the overheads of reporting 
+ * metrics, etc. This is the theoretical throughput the RegionScanner can 
+ * achieve, so this is a bound for the scan throughput.
+ */
+public class RegionScannerBenchmark {
+  public static final Log LOG = LogFactory.getLog(RegionScannerBenchmark.class);
+  private Configuration conf;
+  private FileSystem fs;
+  private Path hbaseRootDir = null;
+  private Path oldLogDir;
+  private Path logDir;
+  
+  public RegionScannerBenchmark() throws IOException {
+    conf = HBaseConfiguration.create();
+    fs = FileSystem.get(conf);
+    this.hbaseRootDir = new Path(this.conf.get(HConstants.HBASE_DIR));
+    this.oldLogDir = 
+      new Path(this.hbaseRootDir, HConstants.HREGION_OLDLOGDIR_NAME);
+    this.logDir = new Path(this.hbaseRootDir, HConstants.HREGION_LOGDIR_NAME);
+    
+    reinitialize();
+  }
+  
+  public void reinitialize() throws IOException {
+    if (fs.exists(this.hbaseRootDir)) {
+      fs.delete(this.hbaseRootDir, true);
+    }
+    Path rootdir = fs.makeQualified(new Path(conf.get(HConstants.HBASE_DIR)));
+    fs.mkdirs(rootdir);    
+  }
+  
+  public void runBenchmark() throws Throwable {
+    String tableNameStr = "RegionScannerBenchmark";
+    HRegion region = createAndbulkLoadHRegion(tableNameStr, 50, 1000000);
+    
+    // warm block cache and jvm jit compilation
+    for (int i = 0; i < 20; i++) {
+      scanHRegion(true, region);
+    }
+  }
+
+  public void scanHRegion(boolean printStats, HRegion region) 
+  throws IOException {
+    // create the scan object with the right params
+    Scan scan = new Scan();
+    scan.setMaxVersions(1);
+
+    // create the RegionScanner object
+    InternalScanner scanner = region.getScanner(scan);
+    
+    // do the scan
+    long numKVs = 0;
+    long numBytes = 0;
+    List<KeyValue> results = new ArrayList<KeyValue>();
+    long t1 = System.currentTimeMillis();
+    while (scanner.next(results) || results.size() > 0) {
+      for (KeyValue kv : results) {
+        numKVs++;
+        numBytes += kv.getLength();
+      }
+      results.clear();
+    }
+    long t2 = System.currentTimeMillis();
+    scanner.close();
+    
+    if (printStats) {
+      double numBytesInMB = numBytes * 1.0 / (1024 * 1024);
+      double rate = numBytesInMB * (1000 * 1.0 / (t2 - t1));
+      System.out.println(
+          "Scan: from region scanner" +
+          ", kvs = " + numKVs +
+          ", bytes = " + String.format("%1$,.2f", numBytesInMB) + " MB" +
+          ", time = " + (t2 - t1) + " ms" +
+          ", rate = " + String.format("%1$,.2f", rate) + "MB/s"
+          );
+    }
+  }
+  
+  public HRegion createAndbulkLoadHRegion(String tableNameStr, 
+      int kvSize, int numKVs) throws IOException {
+    // cleanup old data
+    Path basedir = new Path(this.hbaseRootDir, tableNameStr);
+    deleteDir(basedir);
+    
+    // setup the region
+    HLog wal = createWAL(this.conf);
+    HTableDescriptor htd = new HTableDescriptor(tableNameStr);
+    HColumnDescriptor a = new HColumnDescriptor(Bytes.toBytes("a"));
+    htd.addFamily(a);
+    HRegionInfo hri = new HRegionInfo(htd, null, null, false);
+    HRegion region = HRegion.openHRegion(hri, basedir, wal, this.conf);
+
+    // bulk load some data
+    Path f =  new Path(basedir, "hfile");
+    HFile.Writer writer =
+      HFile.getWriterFactoryNoCache(conf).withPath(fs, f).create();
+    byte [] family = 
+      hri.getTableDesc().getFamilies().iterator().next().getName();
+    byte [] row = Bytes.toBytes(tableNameStr);
+    byte [] value = new byte[kvSize];
+    (new Random()).nextBytes(value);
+    for (int i = 0; i < numKVs; i++) {
+      writer.append(new KeyValue(row, family, Bytes.toBytes(i), 
+          System.currentTimeMillis(), value));
+    }
+    writer.close();
+    region.bulkLoadHFile(f.toString(), family);
+    return region;
+  }
+
+  private void deleteDir(final Path p) throws IOException {
+    if (this.fs.exists(p)) {
+      if (!this.fs.delete(p, true)) {
+        throw new IOException("Failed remove of " + p);
+      }
+    }
+  }
+  private HLog createWAL(final Configuration c) throws IOException {
+    HLog wal = new HLog(FileSystem.get(c), logDir, oldLogDir, c, null);
+    return wal;
+  }
+
+  public static void main(String[] args) throws Throwable {
+    RegionScannerBenchmark benchmark = new RegionScannerBenchmark();
+    benchmark.runBenchmark();
+  }
+}

Modified: hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/ScanBenchmark.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/ScanBenchmark.java?rev=1401280&r1=1401279&r2=1401280&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/ScanBenchmark.java (original)
+++ hbase/branches/0.89-fb/src/test/java/org/apache/hadoop/hbase/benchmarks/ScanBenchmark.java Tue Oct 23 12:51:38 2012
@@ -2,26 +2,19 @@ package org.apache.hadoop.hbase.benchmar
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.KeyValue;
-import org.apache.hadoop.hbase.TableExistsException;
 import org.apache.hadoop.hbase.client.HTable;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.ResultScanner;
 import org.apache.hadoop.hbase.client.Scan;
-import org.apache.hadoop.hbase.loadtest.ColumnFamilyProperties;
 import org.apache.hadoop.hbase.loadtest.HBaseUtils;
 import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.hadoop.hbase.util.LoadTestTool;
-import org.apache.log4j.Level;
-import org.apache.log4j.Logger;
 
 /**
  * This test compares the performance of scan when all the data is in memory 
@@ -33,12 +26,12 @@ public class ScanBenchmark extends Bench
   private static final long PRINT_INTERVAL_KVS = 1000000;
   private byte[] tableName = Bytes.toBytes("bench.ScanFromMemoryPerf");
   private static Integer[] SET_CACHING_VALUES = { 
-//    1000,  2000,  3000,  4000,
-//    5000,  6000,  7000,  8000, 
+    5000,  6000,  7000,  8000, 
     9000,  10000, 11000, 12000, 
-    13000, 14000, 
+    13000, 14000, 15000, 16000,
+    17000, 18000, 19000, 20000
   };
-  private static Integer[] SET_PREFETCH_VALUES = { 0 };  
+  private static Integer[] SET_PREFETCH_VALUES = { 0 };
   
   public void initBenchmarkResults() {
     List<String> header = new ArrayList<String>();
@@ -52,9 +45,12 @@ public class ScanBenchmark extends Bench
   
   public void runBenchmark() throws Throwable {
     // populate the table
-    createTableAndLoadData(tableName, 50, 1000000);
-    // warm block cache 
-    runExperiment(false, 10000, 0);  
+    createTableAndLoadData(tableName, 50, 1000000, true);
+    // warm block cache, force jit compilation
+    System.out.println("Warming blockcache and forcing JIT compilation...");
+    for (int i = 0; i < 20; i++) {
+      runExperiment(false, 10000, 0);  
+    }
     for (int caching : SET_CACHING_VALUES) {  
       for (int prefetch : SET_PREFETCH_VALUES) {
         try { 
@@ -93,11 +89,15 @@ public class ScanBenchmark extends Bench
 
       if (numKVs > printAfterNumKVs) {
         printAfterNumKVs += PRINT_INTERVAL_KVS;
-        if (printStats) printStats(numKVs, numBytes, startTime, caching, prefetch);
+        if (printStats) {
+          printStats(numKVs, numBytes, startTime, caching, prefetch);
+        }
       }
     }
 
-    if (printStats) printStats(numKVs, numBytes, startTime, caching, prefetch);
+    if (printStats) {
+      printStats(numKVs, numBytes, startTime, caching, prefetch);
+    }
     scanner.close();
   }