You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2014/06/14 23:09:28 UTC

git commit: HBASE-11350 [PE] Allow random value size

Repository: hbase
Updated Branches:
  refs/heads/master 58549428a -> 63f0dffdb


HBASE-11350 [PE] Allow random value size


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/63f0dffd
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/63f0dffd
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/63f0dffd

Branch: refs/heads/master
Commit: 63f0dffdba6b9e70fc26a653dd8b1ea64a4e2b70
Parents: 5854942
Author: Michael Stack <st...@apache.org>
Authored: Sat Jun 14 17:08:56 2014 -0400
Committer: Michael Stack <st...@apache.org>
Committed: Sat Jun 14 17:09:18 2014 -0400

----------------------------------------------------------------------
 .../hadoop/hbase/PerformanceEvaluation.java     | 179 ++++++++++++++-----
 .../hbase/mapreduce/TestHFileOutputFormat.java  |   3 +-
 .../hbase/mapreduce/TestHFileOutputFormat2.java |   3 +-
 3 files changed, 143 insertions(+), 42 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/63f0dffd/hbase-server/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java
index e9e2310..c647e4b 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/PerformanceEvaluation.java
@@ -86,6 +86,7 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder;
 import com.yammer.metrics.core.Histogram;
 import com.yammer.metrics.stats.UniformSample;
 import com.yammer.metrics.stats.Snapshot;
+
 import org.htrace.Sampler;
 import org.htrace.Trace;
 import org.htrace.TraceScope;
@@ -113,11 +114,11 @@ public class PerformanceEvaluation extends Configured implements Tool {
   public static final String TABLE_NAME = "TestTable";
   public static final byte[] FAMILY_NAME = Bytes.toBytes("info");
   public static final byte[] QUALIFIER_NAME = Bytes.toBytes("data");
-  public static final int VALUE_LENGTH = 1000;
+  public static final int DEFAULT_VALUE_LENGTH = 1000;
   public static final int ROW_LENGTH = 26;
 
   private static final int ONE_GB = 1024 * 1024 * 1000;
-  private static final int ROWS_PER_GB = ONE_GB / VALUE_LENGTH;
+  private static final int DEFAULT_ROWS_PER_GB = ONE_GB / DEFAULT_VALUE_LENGTH;
   // TODO : should we make this configurable
   private static final int TAG_LENGTH = 256;
   private static final DecimalFormat FMT = new DecimalFormat("0.##");
@@ -509,15 +510,18 @@ public class PerformanceEvaluation extends Configured implements Tool {
       this.compression = that.compression;
       this.blockEncoding = that.blockEncoding;
       this.filterAll = that.filterAll;
+      this.valueRandom = that.valueRandom;
+      this.valueSize = that.valueSize;
+      this.period = that.period;
     }
 
     public boolean nomapred = false;
     public boolean filterAll = false;
     public int startRow = 0;
     public float size = 1.0f;
-    public int perClientRunRows = ROWS_PER_GB;
+    public int perClientRunRows = DEFAULT_ROWS_PER_GB;
     public int numClientThreads = 1;
-    public int totalRows = ROWS_PER_GB;
+    public int totalRows = DEFAULT_ROWS_PER_GB;
     public float sampleRate = 1.0f;
     public double traceRate = 0.0;
     public String tableName = TABLE_NAME;
@@ -532,6 +536,9 @@ public class PerformanceEvaluation extends Configured implements Tool {
     public int presplitRegions = 0;
     public Compression.Algorithm compression = Compression.Algorithm.NONE;
     public DataBlockEncoding blockEncoding = DataBlockEncoding.NONE;
+    public boolean valueRandom = false;
+    public int valueSize = DEFAULT_VALUE_LENGTH;
+    public int period = (this.perClientRunRows / 10) == 0? perClientRunRows: perClientRunRows / 10;
   }
 
   /*
@@ -560,6 +567,7 @@ public class PerformanceEvaluation extends Configured implements Tool {
 
     private String testName;
     private Histogram latency;
+    private Histogram valueSize;
 
     /**
      * Note that all subclasses of this class must provide a public contructor
@@ -582,13 +590,40 @@ public class PerformanceEvaluation extends Configured implements Tool {
       LOG.info("Sampling 1 every " + everyN + " out of " + opts.perClientRunRows + " total rows.");
     }
 
-    private String generateStatus(final int sr, final int i, final int lr) {
-      return sr + "/" + i + "/" + lr + " " + getShortLatencyReport();
+    int getValueLength(final Random r) {
+      return opts.valueRandom? Math.abs(r.nextInt() % opts.valueSize): opts.valueSize;
+    }
+
+    void updateValueSize(final Result [] rs) throws IOException {
+      if (rs == null || !isRandomValueSize()) return;
+      for (Result r: rs) updateValueSize(r);
+    }
+ 
+    void updateValueSize(final Result r) throws IOException {
+      if (r == null || !isRandomValueSize()) return;
+      int size = 0;
+      for (CellScanner scanner = r.cellScanner(); scanner.advance();) {
+        size += scanner.current().getValueLength();
+      }
+      updateValueSize(size);
+    }
+
+    void updateValueSize(final int valueSize) {
+      if (!isRandomValueSize()) return;
+      this.valueSize.update(valueSize);
+    }
+
+    String generateStatus(final int sr, final int i, final int lr) {
+      return sr + "/" + i + "/" + lr + ", latency " + getShortLatencyReport() +
+        (!isRandomValueSize()? "": ", value size " + getShortValueSizeReport());
+    }
+ 
+    boolean isRandomValueSize() {
+      return opts.valueRandom;
     }
 
     protected int getReportingPeriod() {
-      int period = opts.perClientRunRows / 10;
-      return period == 0 ? opts.perClientRunRows : period;
+      return opts.period;
     }
 
     void testSetup() throws IOException {
@@ -601,6 +636,7 @@ public class PerformanceEvaluation extends Configured implements Tool {
             Histogram.class.getDeclaredConstructor(com.yammer.metrics.stats.Sample.class);
         ctor.setAccessible(true);
         latency = (Histogram) ctor.newInstance(new UniformSample(1024 * 500));
+        valueSize = (Histogram) ctor.newInstance(new UniformSample(1024 * 500));
       } catch (Exception e) {
         throw new RuntimeException(e);
       }
@@ -609,6 +645,7 @@ public class PerformanceEvaluation extends Configured implements Tool {
 
     void testTakedown() throws IOException {
       reportLatency();
+      reportValueSize();
       if (opts.flushCommits) {
         this.table.flushCommits();
       }
@@ -662,17 +699,27 @@ public class PerformanceEvaluation extends Configured implements Tool {
     private void reportLatency() throws IOException {
       status.setStatus(testName + " latency log (microseconds), on " +
           latency.count() + " measures");
-      Snapshot sn = latency.getSnapshot();
-      status.setStatus(testName + " Min      = " + latency.min());
-      status.setStatus(testName + " Avg      = " + latency.mean());
-      status.setStatus(testName + " StdDev   = " + latency.stdDev());
+      reportHistogram(this.latency);
+    }
+
+    private void reportValueSize() throws IOException {
+      status.setStatus(testName + " valueSize after " +
+          valueSize.count() + " measures");
+      reportHistogram(this.valueSize);
+    }
+ 
+    private void reportHistogram(final Histogram h) throws IOException {
+      Snapshot sn = h.getSnapshot();
+      status.setStatus(testName + " Min      = " + h.min());
+      status.setStatus(testName + " Avg      = " + h.mean());
+      status.setStatus(testName + " StdDev   = " + h.stdDev());
       status.setStatus(testName + " 50th     = " + sn.getMedian());
       status.setStatus(testName + " 95th     = " + sn.get95thPercentile());
       status.setStatus(testName + " 99th     = " + sn.get99thPercentile());
       status.setStatus(testName + " 99.9th   = " + sn.get999thPercentile());
       status.setStatus(testName + " 99.99th  = " + sn.getValue(0.9999));
       status.setStatus(testName + " 99.999th = " + sn.getValue(0.99999));
-      status.setStatus(testName + " Max      = " + latency.max());
+      status.setStatus(testName + " Max      = " + h.max());
     }
 
     /**
@@ -684,9 +731,22 @@ public class PerformanceEvaluation extends Configured implements Tool {
      * @return Subset of the histograms' calculation.
      */
     private String getShortLatencyReport() {
-      Snapshot sn = latency.getSnapshot();
-      return "Mean=" + DOUBLE_FORMAT.format(latency.mean()) +
-        ", StdDev=" + DOUBLE_FORMAT.format(latency.stdDev()) +
+      return getShortHistogramReport(this.latency);
+    }
+
+    /**
+     * @return Subset of the histograms' calculation.
+     */
+    private String getShortValueSizeReport() {
+      return getShortHistogramReport(this.valueSize);
+    }
+
+    private String getShortHistogramReport(final Histogram h) {
+      Snapshot sn = h.getSnapshot();
+      return "mean=" + DOUBLE_FORMAT.format(h.mean()) +
+        ", min=" + DOUBLE_FORMAT.format(h.min()) +
+        ", max=" + DOUBLE_FORMAT.format(h.max()) +
+        ", stdDev=" + DOUBLE_FORMAT.format(h.stdDev()) +
         ", 95th=" + DOUBLE_FORMAT.format(sn.get95thPercentile()) +
         ", 99th=" + DOUBLE_FORMAT.format(sn.get99thPercentile());
     }
@@ -716,7 +776,9 @@ public class PerformanceEvaluation extends Configured implements Tool {
       list.addFilter(new WhileMatchFilter(new PageFilter(120)));
       scan.setFilter(list);
       ResultScanner s = this.table.getScanner(scan);
-      for (Result rr; (rr = s.next()) != null;) ;
+      for (Result rr; (rr = s.next()) != null;) {
+        updateValueSize(rr);
+      }
       s.close();
     }
 
@@ -741,12 +803,13 @@ public class PerformanceEvaluation extends Configured implements Tool {
         scan.setFilter(new FilterAllFilter());
       }
       scan.addColumn(FAMILY_NAME, QUALIFIER_NAME);
-      ResultScanner s = this.table.getScanner(scan);
+      Result r = null;
       int count = 0;
-      while (s.next() != null) {
+      ResultScanner s = this.table.getScanner(scan);
+      for (; (r = s.next()) != null;) {
+        updateValueSize(r);;
         count++;
       }
-
       if (i % 100 == 0) {
         LOG.info(String.format("Scan for key range %s - %s returned %s rows",
             Bytes.toString(startAndStopRow.getFirst()),
@@ -837,17 +900,18 @@ public class PerformanceEvaluation extends Configured implements Tool {
       if (opts.multiGet > 0) {
         this.gets.add(get);
         if (this.gets.size() == opts.multiGet) {
-          this.table.get(this.gets);
+          Result [] rs = this.table.get(this.gets);
+          updateValueSize(rs);
           this.gets.clear();
         }
       } else {
-        this.table.get(get);
+        updateValueSize(this.table.get(get));
       }
     }
 
     @Override
     protected int getReportingPeriod() {
-      int period = opts.perClientRunRows / 100;
+      int period = opts.perClientRunRows / 10;
       return period == 0 ? opts.perClientRunRows : period;
     }
 
@@ -870,7 +934,7 @@ public class PerformanceEvaluation extends Configured implements Tool {
     void testRow(final int i) throws IOException {
       byte[] row = getRandomRow(this.rand, opts.totalRows);
       Put put = new Put(row);
-      byte[] value = generateData(this.rand, VALUE_LENGTH);
+      byte[] value = generateData(this.rand, getValueLength(this.rand));
       if (opts.useTags) {
         byte[] tag = generateData(this.rand, TAG_LENGTH);
         Tag[] tags = new Tag[opts.noOfTags];
@@ -881,15 +945,16 @@ public class PerformanceEvaluation extends Configured implements Tool {
         KeyValue kv = new KeyValue(row, FAMILY_NAME, QUALIFIER_NAME, HConstants.LATEST_TIMESTAMP,
             value, tags);
         put.add(kv);
+        updateValueSize(kv.getValueLength());
       } else {
         put.add(FAMILY_NAME, QUALIFIER_NAME, value);
+        updateValueSize(value.length);
       }
       put.setDurability(opts.writeToWAL ? Durability.SYNC_WAL : Durability.SKIP_WAL);
       table.put(put);
     }
   }
 
-
   static class ScanTest extends Test {
     private ResultScanner testScanner;
 
@@ -917,7 +982,8 @@ public class PerformanceEvaluation extends Configured implements Tool {
         }
        this.testScanner = table.getScanner(scan);
       }
-      testScanner.next();
+      Result r = testScanner.next();
+      updateValueSize(r);
     }
 
   }
@@ -934,7 +1000,7 @@ public class PerformanceEvaluation extends Configured implements Tool {
       if (opts.filterAll) {
         get.setFilter(new FilterAllFilter());
       }
-      table.get(get);
+      updateValueSize(table.get(get));
     }
   }
 
@@ -947,7 +1013,7 @@ public class PerformanceEvaluation extends Configured implements Tool {
     void testRow(final int i) throws IOException {
       byte[] row = format(i);
       Put put = new Put(row);
-      byte[] value = generateData(this.rand, VALUE_LENGTH);
+      byte[] value = generateData(this.rand, getValueLength(this.rand));
       if (opts.useTags) {
         byte[] tag = generateData(this.rand, TAG_LENGTH);
         Tag[] tags = new Tag[opts.noOfTags];
@@ -958,8 +1024,10 @@ public class PerformanceEvaluation extends Configured implements Tool {
         KeyValue kv = new KeyValue(row, FAMILY_NAME, QUALIFIER_NAME, HConstants.LATEST_TIMESTAMP,
             value, tags);
         put.add(kv);
+        updateValueSize(kv.getValueLength());
       } else {
         put.add(FAMILY_NAME, QUALIFIER_NAME, value);
+        updateValueSize(value.length);
       }
       put.setDurability(opts.writeToWAL ? Durability.SYNC_WAL : Durability.SKIP_WAL);
       table.put(put);
@@ -975,12 +1043,13 @@ public class PerformanceEvaluation extends Configured implements Tool {
 
     @Override
     void testRow(int i) throws IOException {
-      byte[] value = generateData(this.rand, VALUE_LENGTH);
+      byte[] value = generateData(this.rand, getValueLength(this.rand));
       Scan scan = constructScan(value);
       ResultScanner scanner = null;
       try {
         scanner = this.table.getScanner(scan);
-        while (scanner.next() != null) {
+        for (Result r = null; (r = scanner.next()) != null;) {
+          updateValueSize(r);
         }
       } finally {
         if (scanner != null) scanner.close();
@@ -1010,11 +1079,11 @@ public class PerformanceEvaluation extends Configured implements Tool {
    * @param timeMs Time taken in milliseconds.
    * @return String value with label, ie '123.76 MB/s'
    */
-  private static String calculateMbps(int rows, long timeMs) {
+  private static String calculateMbps(int rows, long timeMs, final int valueSize) {
     // MB/s = ((totalRows * ROW_SIZE_BYTES) / totalTimeMS)
     //        * 1000 MS_PER_SEC / (1024 * 1024) BYTES_PER_MB
     BigDecimal rowSize =
-      BigDecimal.valueOf(ROW_LENGTH + VALUE_LENGTH + FAMILY_NAME.length + QUALIFIER_NAME.length);
+      BigDecimal.valueOf(ROW_LENGTH + valueSize + FAMILY_NAME.length + QUALIFIER_NAME.length);
     BigDecimal mbps = BigDecimal.valueOf(rows).multiply(rowSize, CXT)
       .divide(BigDecimal.valueOf(timeMs), CXT).multiply(MS_PER_SEC, CXT)
       .divide(BYTES_PER_MB, CXT);
@@ -1071,7 +1140,7 @@ public class PerformanceEvaluation extends Configured implements Tool {
    */
   @Deprecated
   public static byte[] generateValue(final Random r) {
-    return generateData(r, VALUE_LENGTH);
+    return generateData(r, DEFAULT_VALUE_LENGTH);
   }
 
   static byte [] getRandomRow(final Random random, final int totalRows) {
@@ -1102,10 +1171,15 @@ public class PerformanceEvaluation extends Configured implements Tool {
 
     status.setStatus("Finished " + cmd + " in " + totalElapsedTime +
       "ms at offset " + opts.startRow + " for " + opts.perClientRunRows + " rows" +
-      " (" + calculateMbps((int)(opts.perClientRunRows * opts.sampleRate), totalElapsedTime) + ")");
+      " (" + calculateMbps((int)(opts.perClientRunRows * opts.sampleRate), totalElapsedTime,
+          getAverageValueLength(opts)) + ")");
     return totalElapsedTime;
   }
 
+  private static int getAverageValueLength(final TestOptions opts) {
+    return opts.valueRandom? opts.valueSize/2: opts.valueSize;
+  }
+
   private void runTest(final Class<? extends Test> cmd, TestOptions opts) throws IOException,
       InterruptedException, ClassNotFoundException {
     HBaseAdmin admin = null;
@@ -1131,9 +1205,7 @@ public class PerformanceEvaluation extends Configured implements Tool {
       System.err.println(message);
     }
     System.err.println("Usage: java " + this.getClass().getName() + " \\");
-    System.err.println("  [--nomapred] [--rows=ROWS] [--table=NAME] \\");
-    System.err.println("  [--compress=TYPE] [--blockEncoding=TYPE] " +
-      "[-D<property=value>]* <command> <nclients>");
+    System.err.println("  <OPTIONS> [-D<property=value>]* <command> <nclients>");
     System.err.println();
     System.err.println("Options:");
     System.err.println(" nomapred        Run multiple clients using threads " +
@@ -1166,6 +1238,11 @@ public class PerformanceEvaluation extends Configured implements Tool {
         + " there by not returning any thing back to the client.  Helps to check the server side"
         + " performance.  Uses FilterAllFilter internally. ");
     System.err.println(" latency         Set to report operation latencies. Default: False");
+    System.err.println(" valueSize       Pass value size to use: Default: 1024");
+    System.err.println(" valueRandom     Set if we should vary value size between 0 and " +
+        "'valueSize': Default: Not set.");
+    System.err.println(" period          Report every 'period' rows: " +
+      "Default: opts.perClientRunRows / 10");
     System.err.println();
     System.err.println(" Note: -D properties will be applied to the conf used. ");
     System.err.println("  For example: ");
@@ -1332,21 +1409,43 @@ public class PerformanceEvaluation extends Configured implements Tool {
           continue;
         }
 
+        final String valueSize = "--valueSize=";
+        if (cmd.startsWith(valueSize)) {
+          opts.valueSize = Integer.parseInt(cmd.substring(valueSize.length()));
+          continue;
+        }
+
+        final String valueRandom = "--valueRandom";
+        if (cmd.startsWith(valueRandom)) {
+          opts.valueRandom = true;
+          continue;
+        }
+
+        final String period = "--period=";
+        if (cmd.startsWith(period)) {
+          opts.period = Integer.parseInt(cmd.substring(period.length()));
+          continue;
+        }
+
         Class<? extends Test> cmdClass = determineCommandClass(cmd);
         if (cmdClass != null) {
           opts.numClientThreads = getNumClients(i + 1, args);
           if (opts.size != DEFAULT_OPTS.size &&
             opts.perClientRunRows != DEFAULT_OPTS.perClientRunRows) {
-            throw new IllegalArgumentException(rows + " and " + size + " are mutually exclusive arguments.");
+            throw new IllegalArgumentException(rows + " and " + size +
+              " are mutually exclusive arguments.");
           }
+          // Calculate how many rows per gig.  If random value size presume that that half the max
+          // is average row size.
+          int rowsPerGB = ONE_GB / (opts.valueRandom? opts.valueSize/2: opts.valueSize);
           if (opts.size != DEFAULT_OPTS.size) {
             // total size in GB specified
-            opts.totalRows = (int) opts.size * ROWS_PER_GB;
+            opts.totalRows = (int) opts.size * rowsPerGB;
             opts.perClientRunRows = opts.totalRows / opts.numClientThreads;
           } else if (opts.perClientRunRows != DEFAULT_OPTS.perClientRunRows) {
             // number of rows specified
             opts.totalRows = opts.perClientRunRows * opts.numClientThreads;
-            opts.size = opts.totalRows / ROWS_PER_GB;
+            opts.size = opts.totalRows / rowsPerGB;
           }
           runTest(cmdClass, opts);
           errCode = 0;

http://git-wip-us.apache.org/repos/asf/hbase/blob/63f0dffd/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat.java
index f259352..a46660e 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat.java
@@ -345,7 +345,8 @@ public class TestHFileOutputFormat  {
     // first region start key is always empty
     ret[0] = HConstants.EMPTY_BYTE_ARRAY;
     for (int i = 1; i < numKeys; i++) {
-      ret[i] = PerformanceEvaluation.generateData(random, PerformanceEvaluation.VALUE_LENGTH);
+      ret[i] =
+        PerformanceEvaluation.generateData(random, PerformanceEvaluation.DEFAULT_VALUE_LENGTH);
     }
     return ret;
   }

http://git-wip-us.apache.org/repos/asf/hbase/blob/63f0dffd/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java
index 0485ac5..76db299 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/mapreduce/TestHFileOutputFormat2.java
@@ -344,7 +344,8 @@ public class TestHFileOutputFormat2  {
     // first region start key is always empty
     ret[0] = HConstants.EMPTY_BYTE_ARRAY;
     for (int i = 1; i < numKeys; i++) {
-      ret[i] = PerformanceEvaluation.generateData(random, PerformanceEvaluation.VALUE_LENGTH);
+      ret[i] =
+        PerformanceEvaluation.generateData(random, PerformanceEvaluation.DEFAULT_VALUE_LENGTH);
     }
     return ret;
   }