You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ja...@apache.org on 2019/06/28 22:44:18 UTC

[incubator-pinot] branch master updated: Offheap String Dictionary Benchmark (#4381)

This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 33f583b  Offheap String Dictionary Benchmark (#4381)
33f583b is described below

commit 33f583b2a84a8a22143a26c9cc6b80dc2a81563c
Author: Xiaotian (Jackie) Jiang <17...@users.noreply.github.com>
AuthorDate: Fri Jun 28 15:44:13 2019 -0700

    Offheap String Dictionary Benchmark (#4381)
    
    Enhance BenchmarkStringDictionary to benchmark read/write for on-heap/off-heap mutable dictionary for string of different lengths
    
    Benchmark                                               (_maxValueLength)  Mode  Cnt     Score    Error  Units
    BenchmarkStringDictionary.offHeapStringDictionaryRead                   8  avgt    5   183.163 ±  5.602  ms/op
    BenchmarkStringDictionary.offHeapStringDictionaryRead                  16  avgt    5   220.020 ±  4.484  ms/op
    BenchmarkStringDictionary.offHeapStringDictionaryRead                  32  avgt    5   271.777 ±  4.843  ms/op
    BenchmarkStringDictionary.offHeapStringDictionaryRead                  64  avgt    5   344.430 ±  1.899  ms/op
    BenchmarkStringDictionary.offHeapStringDictionaryRead                 128  avgt    5   426.191 ±  2.287  ms/op
    BenchmarkStringDictionary.offHeapStringDictionaryRead                 256  avgt    5   577.688 ±  3.335  ms/op
    BenchmarkStringDictionary.offHeapStringDictionaryRead                 512  avgt    5   763.864 ±  2.344  ms/op
    BenchmarkStringDictionary.offHeapStringDictionaryRead                1024  avgt    5  1168.924 ±  7.968  ms/op
    BenchmarkStringDictionary.offHeapStringDictionaryWrite                  8  avgt    5   199.991 ±  3.149  ms/op
    BenchmarkStringDictionary.offHeapStringDictionaryWrite                 16  avgt    5   236.314 ± 12.376  ms/op
    BenchmarkStringDictionary.offHeapStringDictionaryWrite                 32  avgt    5   282.874 ±  4.792  ms/op
    BenchmarkStringDictionary.offHeapStringDictionaryWrite                 64  avgt    5   354.617 ±  3.147  ms/op
    BenchmarkStringDictionary.offHeapStringDictionaryWrite                128  avgt    5   448.211 ±  2.522  ms/op
    BenchmarkStringDictionary.offHeapStringDictionaryWrite                256  avgt    5   552.417 ±  4.184  ms/op
    BenchmarkStringDictionary.offHeapStringDictionaryWrite                512  avgt    5   757.177 ±  3.845  ms/op
    BenchmarkStringDictionary.offHeapStringDictionaryWrite               1024  avgt    5  1209.502 ±  3.340  ms/op
    BenchmarkStringDictionary.onHeapStringDictionaryRead                    8  avgt    5    72.578 ±  3.250  ms/op
    BenchmarkStringDictionary.onHeapStringDictionaryRead                   16  avgt    5    70.733 ±  1.130  ms/op
    BenchmarkStringDictionary.onHeapStringDictionaryRead                   32  avgt    5    77.652 ±  0.751  ms/op
    BenchmarkStringDictionary.onHeapStringDictionaryRead                   64  avgt    5    53.886 ±  2.062  ms/op
    BenchmarkStringDictionary.onHeapStringDictionaryRead                  128  avgt    5    76.544 ±  1.037  ms/op
    BenchmarkStringDictionary.onHeapStringDictionaryRead                  256  avgt    5    71.787 ±  0.333  ms/op
    BenchmarkStringDictionary.onHeapStringDictionaryRead                  512  avgt    5    78.395 ±  8.629  ms/op
    BenchmarkStringDictionary.onHeapStringDictionaryRead                 1024  avgt    5    83.715 ±  7.494  ms/op
    BenchmarkStringDictionary.onHeapStringDictionaryWrite                   8  avgt    5    87.510 ±  0.799  ms/op
    BenchmarkStringDictionary.onHeapStringDictionaryWrite                  16  avgt    5    88.239 ±  0.331  ms/op
    BenchmarkStringDictionary.onHeapStringDictionaryWrite                  32  avgt    5    95.107 ±  0.845  ms/op
    BenchmarkStringDictionary.onHeapStringDictionaryWrite                  64  avgt    5   103.359 ±  1.934  ms/op
    BenchmarkStringDictionary.onHeapStringDictionaryWrite                 128  avgt    5   108.769 ±  1.125  ms/op
    BenchmarkStringDictionary.onHeapStringDictionaryWrite                 256  avgt    5   119.936 ± 17.283  ms/op
    BenchmarkStringDictionary.onHeapStringDictionaryWrite                 512  avgt    5   126.183 ±  0.595  ms/op
    BenchmarkStringDictionary.onHeapStringDictionaryWrite                1024  avgt    5   130.646 ±  2.861  ms/op
---
 pinot-perf/pom.xml                                 |   4 +-
 .../pinot/perf/BenchmarkStringDictionary.java      | 118 +++++++++++++--------
 2 files changed, 75 insertions(+), 47 deletions(-)

diff --git a/pinot-perf/pom.xml b/pinot-perf/pom.xml
index a37057b..64b1891 100644
--- a/pinot-perf/pom.xml
+++ b/pinot-perf/pom.xml
@@ -98,7 +98,7 @@
     <dependency>
       <groupId>org.openjdk.jmh</groupId>
       <artifactId>jmh-core</artifactId>
-      <version>1.15</version>
+      <version>1.21</version>
       <exclusions>
         <exclusion>
           <groupId>net.sf.jopt-simple</groupId>
@@ -109,7 +109,7 @@
     <dependency>
       <groupId>org.openjdk.jmh</groupId>
       <artifactId>jmh-generator-annprocess</artifactId>
-      <version>1.15</version>
+      <version>1.21</version>
       <scope>provided</scope>
     </dependency>
     <dependency>
diff --git a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkStringDictionary.java b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkStringDictionary.java
index ffb295b..3f965bb 100644
--- a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkStringDictionary.java
+++ b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkStringDictionary.java
@@ -18,102 +18,130 @@
  */
 package org.apache.pinot.perf;
 
+import java.io.IOException;
 import java.util.Random;
 import java.util.concurrent.TimeUnit;
+import org.apache.pinot.common.utils.StringUtil;
 import org.apache.pinot.core.io.readerwriter.PinotDataBufferMemoryManager;
 import org.apache.pinot.core.io.writer.impl.DirectMemoryManager;
 import org.apache.pinot.core.realtime.impl.dictionary.StringOffHeapMutableDictionary;
 import org.apache.pinot.core.realtime.impl.dictionary.StringOnHeapMutableDictionary;
 import org.openjdk.jmh.annotations.Benchmark;
 import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
 import org.openjdk.jmh.annotations.Mode;
 import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
 import org.openjdk.jmh.annotations.Scope;
 import org.openjdk.jmh.annotations.Setup;
 import org.openjdk.jmh.annotations.State;
 import org.openjdk.jmh.annotations.TearDown;
-import org.openjdk.jmh.profile.GCProfiler;
-import org.openjdk.jmh.profile.HotspotMemoryProfiler;
+import org.openjdk.jmh.annotations.Warmup;
 import org.openjdk.jmh.runner.Runner;
 import org.openjdk.jmh.runner.options.ChainedOptionsBuilder;
 import org.openjdk.jmh.runner.options.OptionsBuilder;
-import org.openjdk.jmh.runner.options.TimeValue;
 
 
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@Warmup(iterations = 3, time = 30)
+@Measurement(iterations = 5, time = 30)
+@Fork(1)
 @State(Scope.Benchmark)
 public class BenchmarkStringDictionary {
-  private static final int ROW_COUNT = 2_500_000;
-  private static final int CARDINALITY = 1_000_000;
-  private static final int MAX_STRING_LEN = 32;
+  private static final int NUM_RECORDS = 1_000_000;
+  private static final int CARDINALITY = 200_000;
+  private static final Random RANDOM = new Random();
+
+  @Param({"8", "16", "32", "64", "128", "256", "512", "1024"})
+  private int _maxValueLength;
 
-  private String[] _stringValues;
   private PinotDataBufferMemoryManager _memoryManager;
+  private String[] _values;
+  private StringOffHeapMutableDictionary _offHeapDictionary;
+  private StringOnHeapMutableDictionary _onHeapDictionary;
 
   @Setup
   public void setUp() {
-    _memoryManager = new DirectMemoryManager(BenchmarkStringDictionary.class.getName());
-    // Create a list of values to insert into the hash map
-    String[] uniqueStrings = new String[CARDINALITY];
-    Random r = new Random();
-    for (int i = 0; i < uniqueStrings.length; i++) {
-      uniqueStrings[i] = generateRandomString(r, r.nextInt(MAX_STRING_LEN + 1));
+    _memoryManager = new DirectMemoryManager("");
+    _offHeapDictionary =
+        new StringOffHeapMutableDictionary(CARDINALITY, CARDINALITY / 10, _memoryManager, null, _maxValueLength / 2);
+    _onHeapDictionary = new StringOnHeapMutableDictionary();
+    String[] uniqueValues = new String[CARDINALITY];
+    for (int i = 0; i < CARDINALITY; i++) {
+      String value = generateRandomString(RANDOM.nextInt(_maxValueLength + 1));
+      uniqueValues[i] = value;
+      _offHeapDictionary.index(value);
+      _onHeapDictionary.index(value);
     }
-    _stringValues = new String[ROW_COUNT];
-    for (int i = 0; i < _stringValues.length; i++) {
-      int u = r.nextInt(CARDINALITY);
-      _stringValues[i] = uniqueStrings[u];
+    _values = new String[NUM_RECORDS];
+    for (int i = 0; i < NUM_RECORDS; i++) {
+      _values[i] = uniqueValues[RANDOM.nextInt(CARDINALITY)];
     }
   }
 
   @TearDown
   public void tearDown()
       throws Exception {
+    _onHeapDictionary.close();
+    _offHeapDictionary.close();
     _memoryManager.close();
   }
 
-  // Generates a ascii displayable string of given length
-  private String generateRandomString(Random r, final int len) {
-    byte[] bytes = new byte[len];
-    for (int i = 0; i < len; i++) {
-      bytes[i] = (byte) (r.nextInt(92) + 32);
+  // Generates a ascii displayable string of the given length
+  private String generateRandomString(int length) {
+    byte[] bytes = new byte[length];
+    for (int i = 0; i < length; i++) {
+      bytes[i] = (byte) (RANDOM.nextInt(0x7F - 0x20) + 0x20);
     }
-    return new String(bytes);
+    return StringUtil.decodeUtf8(bytes);
   }
 
   @Benchmark
-  @BenchmarkMode(Mode.SampleTime)
-  @OutputTimeUnit(TimeUnit.MILLISECONDS)
-  public StringOffHeapMutableDictionary benchmarkOffHeapStringDictionary() {
-    StringOffHeapMutableDictionary dictionary =
-        new StringOffHeapMutableDictionary(5000, 10, _memoryManager, "stringColumn", 32);
-
-    for (String stringValue : _stringValues) {
-      dictionary.index(stringValue);
+  public int offHeapStringDictionaryRead() {
+    int sum = 0;
+    for (String stringValue : _values) {
+      sum += _offHeapDictionary.indexOf(stringValue);
     }
-
-    return dictionary;
+    return sum;
   }
 
   @Benchmark
-  @BenchmarkMode(Mode.SampleTime)
-  @OutputTimeUnit(TimeUnit.MILLISECONDS)
-  public StringOnHeapMutableDictionary benchmarkOnHeapStringDictionary() {
-    StringOnHeapMutableDictionary dictionary = new StringOnHeapMutableDictionary();
+  public int onHeapStringDictionaryRead() {
+    int sum = 0;
+    for (String stringValue : _values) {
+      sum += _onHeapDictionary.indexOf(stringValue);
+    }
+    return sum;
+  }
 
-    for (String stringValue : _stringValues) {
-      dictionary.index(stringValue);
+  @Benchmark
+  public int offHeapStringDictionaryWrite()
+      throws IOException {
+    try (StringOffHeapMutableDictionary offHeapDictionary = new StringOffHeapMutableDictionary(CARDINALITY,
+        CARDINALITY / 10, _memoryManager, null, _maxValueLength / 2)) {
+      for (String stringValue : _values) {
+        offHeapDictionary.index(stringValue);
+      }
+      return offHeapDictionary.length();
     }
+  }
 
-    return dictionary;
+  @Benchmark
+  public int onHeapStringDictionaryWrite()
+      throws IOException {
+    try (StringOnHeapMutableDictionary onHeapDictionary = new StringOnHeapMutableDictionary()) {
+      for (String stringValue : _values) {
+        onHeapDictionary.index(stringValue);
+      }
+      return onHeapDictionary.length();
+    }
   }
 
   public static void main(String[] args)
       throws Exception {
-    ChainedOptionsBuilder opt =
-        new OptionsBuilder().include(BenchmarkStringDictionary.class.getSimpleName()).addProfiler(GCProfiler.class)
-            .addProfiler(HotspotMemoryProfiler.class).warmupTime(TimeValue.seconds(60)).warmupIterations(8)
-            .measurementTime(TimeValue.seconds(60)).measurementIterations(8).forks(5);
-
+    ChainedOptionsBuilder opt = new OptionsBuilder().include(BenchmarkStringDictionary.class.getSimpleName());
     new Runner(opt.build()).run();
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org