You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by jo...@apache.org on 2019/09/12 23:14:36 UTC
[incubator-druid] branch 0.14.0-incubating updated: Speed up
StringDimensionIndexer.estimateEncodedKeyComponentSize (#8466)
This is an automated email from the ASF dual-hosted git repository.
jonwei pushed a commit to branch 0.14.0-incubating
in repository https://gitbox.apache.org/repos/asf/incubator-druid.git
The following commit(s) were added to refs/heads/0.14.0-incubating by this push:
new 3d8b1bc Speed up StringDimensionIndexer.estimateEncodedKeyComponentSize (#8466)
3d8b1bc is described below
commit 3d8b1bca033bedc3effa93b7579f96121346a428
Author: Jonathan Wei <jo...@users.noreply.github.com>
AuthorDate: Wed Sep 4 20:26:04 2019 -0700
Speed up StringDimensionIndexer.estimateEncodedKeyComponentSize (#8466)
* Speed up StringDimensionIndexer.estimateEncodedKeyComponentSize
* Remove print
* Move benchmark, add header
---
.../indexing/StringDimensionIndexerBenchmark.java | 78 ++++++++++++++++++++++
.../druid/segment/StringDimensionIndexer.java | 12 ++--
2 files changed, 86 insertions(+), 4 deletions(-)
diff --git a/benchmarks/src/main/java/org/apache/druid/benchmark/indexing/StringDimensionIndexerBenchmark.java b/benchmarks/src/main/java/org/apache/druid/benchmark/indexing/StringDimensionIndexerBenchmark.java
new file mode 100644
index 0000000..eacfc65
--- /dev/null
+++ b/benchmarks/src/main/java/org/apache/druid/benchmark/indexing/StringDimensionIndexerBenchmark.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.benchmark.indexing;
+
+import org.apache.druid.data.input.impl.DimensionSchema;
+import org.apache.druid.segment.StringDimensionIndexer;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+
+import java.util.concurrent.TimeUnit;
+
+@State(Scope.Benchmark)
+@Fork(value = 1)
+@Warmup(iterations = 10)
+@Measurement(iterations = 10)
+public class StringDimensionIndexerBenchmark
+{
+ StringDimensionIndexer indexer;
+ int[] exampleArray;
+
+ @Param({"10000"})
+ public int cardinality;
+
+ @Param({"8"})
+ public int rowSize;
+
+ @Setup
+ public void setup()
+ {
+ indexer = new StringDimensionIndexer(DimensionSchema.MultiValueHandling.ofDefault(), true);
+
+ for (int i = 0; i < cardinality; i++) {
+ indexer.processRowValsToUnsortedEncodedKeyComponent("abcd-" + i, true);
+ }
+
+ exampleArray = new int[rowSize];
+ int stride = cardinality / rowSize;
+ for (int i = 0; i < rowSize; i++) {
+ exampleArray[i] = i * stride;
+ }
+ }
+
+ @Benchmark
+ @BenchmarkMode(Mode.AverageTime)
+ @OutputTimeUnit(TimeUnit.MICROSECONDS)
+ public void estimateEncodedKeyComponentSize(Blackhole blackhole)
+ {
+ long sz = indexer.estimateEncodedKeyComponentSize(exampleArray);
+ blackhole.consume(sz);
+ }
+}
diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
index cf201e4..3bedd0d 100644
--- a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
+++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java
@@ -305,10 +305,14 @@ public class StringDimensionIndexer implements DimensionIndexer<Integer, int[],
// even though they are stored just once. It may overestimate the size by a bit, but we wanted to leave
// more buffer to be safe
long estimatedSize = key.length * Integer.BYTES;
- estimatedSize += Arrays.stream(key)
- .filter(element -> dimLookup.getValue(element) != null)
- .mapToLong(element -> dimLookup.getValue(element).length() * Character.BYTES)
- .sum();
+ long totalChars = 0;
+ for (int element : key) {
+ String val = dimLookup.getValue(element);
+ if (val != null) {
+ totalChars += val.length();
+ }
+ }
+ estimatedSize += totalChars * Character.BYTES;
return estimatedSize;
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org