You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ja...@apache.org on 2019/06/28 05:01:18 UTC

[incubator-pinot] 01/01: Offheap String Dictionary Benchmark

This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch off-heap-bench
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit 57fc6ccb66db88acf53e0f4157dbb51e3cbd43c8
Author: Jackie (Xiaotian) Jiang <xa...@linkedin.com>
AuthorDate: Thu Jun 27 21:58:45 2019 -0700

    Offheap String Dictionary Benchmark
    
    Enhance BenchmarkStringDictionary to benchmark read/write for on-heap/off-heap mutable dictionary for string of different lengths
    
    Benchmark                                               (_maxValueLength)  Mode  Cnt     Score     Error  Units
    BenchmarkStringDictionary.offHeapStringDictionaryRead                   8  avgt    5   198.729 ±  82.008  ms/op
    BenchmarkStringDictionary.offHeapStringDictionaryRead                  16  avgt    5   224.921 ±  32.696  ms/op
    BenchmarkStringDictionary.offHeapStringDictionaryRead                  32  avgt    5   342.644 ± 209.424  ms/op
    BenchmarkStringDictionary.offHeapStringDictionaryRead                  64  avgt    5   360.047 ± 101.929  ms/op
    BenchmarkStringDictionary.offHeapStringDictionaryRead                 128  avgt    5   484.173 ± 113.249  ms/op
    BenchmarkStringDictionary.offHeapStringDictionaryRead                 256  avgt    5   611.015 ± 316.933  ms/op
    BenchmarkStringDictionary.offHeapStringDictionaryRead                 512  avgt    5   813.054 ±  80.827  ms/op
    BenchmarkStringDictionary.offHeapStringDictionaryRead                1024  avgt    5  1178.064 ±  35.517  ms/op
    BenchmarkStringDictionary.offHeapStringDictionaryWrite                  8  avgt    5   210.411 ±  75.758  ms/op
    BenchmarkStringDictionary.offHeapStringDictionaryWrite                 16  avgt    5   242.633 ±  48.765  ms/op
    BenchmarkStringDictionary.offHeapStringDictionaryWrite                 32  avgt    5   296.329 ±  70.063  ms/op
    BenchmarkStringDictionary.offHeapStringDictionaryWrite                 64  avgt    5   373.348 ±  72.213  ms/op
    BenchmarkStringDictionary.offHeapStringDictionaryWrite                128  avgt    5   476.224 ± 103.509  ms/op
    BenchmarkStringDictionary.offHeapStringDictionaryWrite                256  avgt    5   594.346 ± 249.458  ms/op
    BenchmarkStringDictionary.offHeapStringDictionaryWrite                512  avgt    5   841.739 ±  74.731  ms/op
    BenchmarkStringDictionary.offHeapStringDictionaryWrite               1024  avgt    5  1241.271 ±  54.812  ms/op
    BenchmarkStringDictionary.onHeapStringDictionaryRead                    8  avgt    5    69.934 ±   6.722  ms/op
    BenchmarkStringDictionary.onHeapStringDictionaryRead                   16  avgt    5    67.843 ±  10.101  ms/op
    BenchmarkStringDictionary.onHeapStringDictionaryRead                   32  avgt    5    75.276 ±   3.305  ms/op
    BenchmarkStringDictionary.onHeapStringDictionaryRead                   64  avgt    5    54.462 ±   7.237  ms/op
    BenchmarkStringDictionary.onHeapStringDictionaryRead                  128  avgt    5    67.534 ±   7.629  ms/op
    BenchmarkStringDictionary.onHeapStringDictionaryRead                  256  avgt    5    64.138 ±  20.597  ms/op
    BenchmarkStringDictionary.onHeapStringDictionaryRead                  512  avgt    5    74.625 ±   1.658  ms/op
    BenchmarkStringDictionary.onHeapStringDictionaryRead                 1024  avgt    5    73.748 ±  23.128  ms/op
    BenchmarkStringDictionary.onHeapStringDictionaryWrite                   8  avgt    5    90.023 ±  12.112  ms/op
    BenchmarkStringDictionary.onHeapStringDictionaryWrite                  16  avgt    5    94.689 ±  15.384  ms/op
    BenchmarkStringDictionary.onHeapStringDictionaryWrite                  32  avgt    5   102.102 ±  14.500  ms/op
    BenchmarkStringDictionary.onHeapStringDictionaryWrite                  64  avgt    5   108.548 ±  17.029  ms/op
    BenchmarkStringDictionary.onHeapStringDictionaryWrite                 128  avgt    5   110.598 ±  13.267  ms/op
    BenchmarkStringDictionary.onHeapStringDictionaryWrite                 256  avgt    5   118.573 ±  16.774  ms/op
    BenchmarkStringDictionary.onHeapStringDictionaryWrite                 512  avgt    5   125.380 ±  41.136  ms/op
    BenchmarkStringDictionary.onHeapStringDictionaryWrite                1024  avgt    5   130.772 ±  47.184  ms/op
---
 pinot-perf/pom.xml                                 |   4 +-
 .../pinot/perf/BenchmarkStringDictionary.java      | 118 +++++++++++++--------
 2 files changed, 75 insertions(+), 47 deletions(-)

diff --git a/pinot-perf/pom.xml b/pinot-perf/pom.xml
index a37057b..64b1891 100644
--- a/pinot-perf/pom.xml
+++ b/pinot-perf/pom.xml
@@ -98,7 +98,7 @@
     <dependency>
       <groupId>org.openjdk.jmh</groupId>
       <artifactId>jmh-core</artifactId>
-      <version>1.15</version>
+      <version>1.21</version>
       <exclusions>
         <exclusion>
           <groupId>net.sf.jopt-simple</groupId>
@@ -109,7 +109,7 @@
     <dependency>
       <groupId>org.openjdk.jmh</groupId>
       <artifactId>jmh-generator-annprocess</artifactId>
-      <version>1.15</version>
+      <version>1.21</version>
       <scope>provided</scope>
     </dependency>
     <dependency>
diff --git a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkStringDictionary.java b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkStringDictionary.java
index ffb295b..872ffcb 100644
--- a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkStringDictionary.java
+++ b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkStringDictionary.java
@@ -18,102 +18,130 @@
  */
 package org.apache.pinot.perf;
 
+import java.io.IOException;
 import java.util.Random;
 import java.util.concurrent.TimeUnit;
+import org.apache.pinot.common.utils.StringUtil;
 import org.apache.pinot.core.io.readerwriter.PinotDataBufferMemoryManager;
 import org.apache.pinot.core.io.writer.impl.DirectMemoryManager;
 import org.apache.pinot.core.realtime.impl.dictionary.StringOffHeapMutableDictionary;
 import org.apache.pinot.core.realtime.impl.dictionary.StringOnHeapMutableDictionary;
 import org.openjdk.jmh.annotations.Benchmark;
 import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
 import org.openjdk.jmh.annotations.Mode;
 import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
 import org.openjdk.jmh.annotations.Scope;
 import org.openjdk.jmh.annotations.Setup;
 import org.openjdk.jmh.annotations.State;
 import org.openjdk.jmh.annotations.TearDown;
-import org.openjdk.jmh.profile.GCProfiler;
-import org.openjdk.jmh.profile.HotspotMemoryProfiler;
+import org.openjdk.jmh.annotations.Warmup;
 import org.openjdk.jmh.runner.Runner;
 import org.openjdk.jmh.runner.options.ChainedOptionsBuilder;
 import org.openjdk.jmh.runner.options.OptionsBuilder;
-import org.openjdk.jmh.runner.options.TimeValue;
 
 
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@Warmup(iterations = 3, time = 3)
+@Measurement(iterations = 5, time = 3)
+@Fork(1)
 @State(Scope.Benchmark)
 public class BenchmarkStringDictionary {
-  private static final int ROW_COUNT = 2_500_000;
-  private static final int CARDINALITY = 1_000_000;
-  private static final int MAX_STRING_LEN = 32;
+  private static final int NUM_RECORDS = 1_000_000;
+  private static final int CARDINALITY = 200_000;
+  private static final Random RANDOM = new Random();
+
+  @Param({"8", "16", "32", "64", "128", "256", "512", "1024"})
+  private int _maxValueLength;
 
-  private String[] _stringValues;
   private PinotDataBufferMemoryManager _memoryManager;
+  private String[] _values;
+  private StringOffHeapMutableDictionary _offHeapDictionary;
+  private StringOnHeapMutableDictionary _onHeapDictionary;
 
   @Setup
   public void setUp() {
-    _memoryManager = new DirectMemoryManager(BenchmarkStringDictionary.class.getName());
-    // Create a list of values to insert into the hash map
-    String[] uniqueStrings = new String[CARDINALITY];
-    Random r = new Random();
-    for (int i = 0; i < uniqueStrings.length; i++) {
-      uniqueStrings[i] = generateRandomString(r, r.nextInt(MAX_STRING_LEN + 1));
+    _memoryManager = new DirectMemoryManager("");
+    _offHeapDictionary =
+        new StringOffHeapMutableDictionary(CARDINALITY, CARDINALITY / 10, _memoryManager, null, _maxValueLength / 2);
+    _onHeapDictionary = new StringOnHeapMutableDictionary();
+    String[] uniqueValues = new String[CARDINALITY];
+    for (int i = 0; i < CARDINALITY; i++) {
+      String value = generateRandomString(RANDOM.nextInt(_maxValueLength + 1));
+      uniqueValues[i] = value;
+      _offHeapDictionary.index(value);
+      _onHeapDictionary.index(value);
     }
-    _stringValues = new String[ROW_COUNT];
-    for (int i = 0; i < _stringValues.length; i++) {
-      int u = r.nextInt(CARDINALITY);
-      _stringValues[i] = uniqueStrings[u];
+    _values = new String[NUM_RECORDS];
+    for (int i = 0; i < NUM_RECORDS; i++) {
+      _values[i] = uniqueValues[RANDOM.nextInt(CARDINALITY)];
     }
   }
 
   @TearDown
   public void tearDown()
       throws Exception {
+    _onHeapDictionary.close();
+    _offHeapDictionary.close();
     _memoryManager.close();
   }
 
-  // Generates a ascii displayable string of given length
-  private String generateRandomString(Random r, final int len) {
-    byte[] bytes = new byte[len];
-    for (int i = 0; i < len; i++) {
-      bytes[i] = (byte) (r.nextInt(92) + 32);
+  // Generates a ascii displayable string of the given length
+  private String generateRandomString(int length) {
+    byte[] bytes = new byte[length];
+    for (int i = 0; i < length; i++) {
+      bytes[i] = (byte) (RANDOM.nextInt(0x7F - 0x20) + 0x20);
     }
-    return new String(bytes);
+    return StringUtil.decodeUtf8(bytes);
   }
 
   @Benchmark
-  @BenchmarkMode(Mode.SampleTime)
-  @OutputTimeUnit(TimeUnit.MILLISECONDS)
-  public StringOffHeapMutableDictionary benchmarkOffHeapStringDictionary() {
-    StringOffHeapMutableDictionary dictionary =
-        new StringOffHeapMutableDictionary(5000, 10, _memoryManager, "stringColumn", 32);
-
-    for (String stringValue : _stringValues) {
-      dictionary.index(stringValue);
+  public int offHeapStringDictionaryRead() {
+    int sum = 0;
+    for (String stringValue : _values) {
+      sum += _offHeapDictionary.indexOf(stringValue);
     }
-
-    return dictionary;
+    return sum;
   }
 
   @Benchmark
-  @BenchmarkMode(Mode.SampleTime)
-  @OutputTimeUnit(TimeUnit.MILLISECONDS)
-  public StringOnHeapMutableDictionary benchmarkOnHeapStringDictionary() {
-    StringOnHeapMutableDictionary dictionary = new StringOnHeapMutableDictionary();
+  public int onHeapStringDictionaryRead() {
+    int sum = 0;
+    for (String stringValue : _values) {
+      sum += _onHeapDictionary.indexOf(stringValue);
+    }
+    return sum;
+  }
 
-    for (String stringValue : _stringValues) {
-      dictionary.index(stringValue);
+  @Benchmark
+  public int offHeapStringDictionaryWrite()
+      throws IOException {
+    try (StringOffHeapMutableDictionary offHeapDictionary = new StringOffHeapMutableDictionary(CARDINALITY,
+        CARDINALITY / 10, _memoryManager, null, _maxValueLength / 2)) {
+      for (String stringValue : _values) {
+        offHeapDictionary.index(stringValue);
+      }
+      return offHeapDictionary.length();
     }
+  }
 
-    return dictionary;
+  @Benchmark
+  public int onHeapStringDictionaryWrite()
+      throws IOException {
+    try (StringOnHeapMutableDictionary onHeapDictionary = new StringOnHeapMutableDictionary()) {
+      for (String stringValue : _values) {
+        onHeapDictionary.index(stringValue);
+      }
+      return onHeapDictionary.length();
+    }
   }
 
   public static void main(String[] args)
       throws Exception {
-    ChainedOptionsBuilder opt =
-        new OptionsBuilder().include(BenchmarkStringDictionary.class.getSimpleName()).addProfiler(GCProfiler.class)
-            .addProfiler(HotspotMemoryProfiler.class).warmupTime(TimeValue.seconds(60)).warmupIterations(8)
-            .measurementTime(TimeValue.seconds(60)).measurementIterations(8).forks(5);
-
+    ChainedOptionsBuilder opt = new OptionsBuilder().include(BenchmarkStringDictionary.class.getSimpleName());
     new Runner(opt.build()).run();
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org