You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by cw...@apache.org on 2019/09/05 07:09:07 UTC

[incubator-druid] branch 0.16.0-incubating updated: Speed up StringDimensionIndexer.estimateEncodedKeyComponentSize (#8466) (#8467)

This is an automated email from the ASF dual-hosted git repository.

cwylie pushed a commit to branch 0.16.0-incubating
in repository https://gitbox.apache.org/repos/asf/incubator-druid.git


The following commit(s) were added to refs/heads/0.16.0-incubating by this push:
     new 6b49a27  Speed up StringDimensionIndexer.estimateEncodedKeyComponentSize (#8466) (#8467)
6b49a27 is described below

commit 6b49a2727fa6a90ef88fa1697037390bf6b1fbc0
Author: Clint Wylie <cw...@apache.org>
AuthorDate: Thu Sep 5 00:08:57 2019 -0700

    Speed up StringDimensionIndexer.estimateEncodedKeyComponentSize (#8466) (#8467)
    
    * Speed up StringDimensionIndexer.estimateEncodedKeyComponentSize
    
    * Remove print
    
    * Move benchmark, add header
---
 .../indexing/StringDimensionIndexerBenchmark.java  | 78 ++++++++++++++++++++++
 .../druid/segment/StringDimensionIndexer.java      | 12 ++--
 2 files changed, 86 insertions(+), 4 deletions(-)

diff --git a/benchmarks/src/main/java/org/apache/druid/benchmark/indexing/StringDimensionIndexerBenchmark.java b/benchmarks/src/main/java/org/apache/druid/benchmark/indexing/StringDimensionIndexerBenchmark.java
new file mode 100644
index 0000000..eacfc65
--- /dev/null
+++ b/benchmarks/src/main/java/org/apache/druid/benchmark/indexing/StringDimensionIndexerBenchmark.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.benchmark.indexing;
+
+import org.apache.druid.data.input.impl.DimensionSchema;
+import org.apache.druid.segment.StringDimensionIndexer;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+
+import java.util.concurrent.TimeUnit;
+
+@State(Scope.Benchmark)
+@Fork(value = 1)
+@Warmup(iterations = 10)
+@Measurement(iterations = 10)
+public class StringDimensionIndexerBenchmark
+{
+  StringDimensionIndexer indexer;
+  int[] exampleArray;
+
+  @Param({"10000"})
+  public int cardinality;
+
+  @Param({"8"})
+  public int rowSize;
+
+  @Setup
+  public void setup()
+  {
+    indexer = new StringDimensionIndexer(DimensionSchema.MultiValueHandling.ofDefault(), true);
+
+    for (int i = 0; i < cardinality; i++) {
+      indexer.processRowValsToUnsortedEncodedKeyComponent("abcd-" + i, true);
+    }
+
+    exampleArray = new int[rowSize];
+    int stride = cardinality / rowSize;
+    for (int i = 0; i < rowSize; i++) {
+      exampleArray[i] = i * stride;
+    }
+  }
+
+  @Benchmark
+  @BenchmarkMode(Mode.AverageTime)
+  @OutputTimeUnit(TimeUnit.MICROSECONDS)
+  public void estimateEncodedKeyComponentSize(Blackhole blackhole)
+  {
+    long sz = indexer.estimateEncodedKeyComponentSize(exampleArray);
+    blackhole.consume(sz);
+  }
+}
diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
index 5bf20b9..f08a495 100644
--- a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
+++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
@@ -308,10 +308,14 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
     // even though they are stored just once. It may overestimate the size by a bit, but we wanted to leave
     // more buffer to be safe
     long estimatedSize = key.length * Integer.BYTES;
-    estimatedSize += Arrays.stream(key)
-                           .filter(element -> dimLookup.getValue(element) != null)
-                           .mapToLong(element -> dimLookup.getValue(element).length() * Character.BYTES)
-                           .sum();
+    long totalChars = 0;
+    for (int element : key) {
+      String val = dimLookup.getValue(element);
+      if (val != null) {
+        totalChars += val.length();
+      }
+    }
+    estimatedSize += totalChars * Character.BYTES;
     return estimatedSize;
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org