You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by gi...@apache.org on 2022/04/27 21:20:41 UTC
[druid] branch master updated: DimensionRangeShardSpec speed boost. (#12477)
This is an automated email from the ASF dual-hosted git repository.
gian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git
The following commit(s) were added to refs/heads/master by this push:
new 7b89682bbe DimensionRangeShardSpec speed boost. (#12477)
7b89682bbe is described below
commit 7b89682bbecc0feffb1996f90f6d8905458c054b
Author: Gian Merlino <gi...@imply.io>
AuthorDate: Wed Apr 27 14:20:35 2022 -0700
DimensionRangeShardSpec speed boost. (#12477)
* DimensionRangeShardSpec speed boost.
Calling isEmpty() and equals() on RangeSets is expensive, because these
fall back on default implementations that call size(). And size() is
_also_ a default implementation that iterates the entire collection.
* Fix and test from code review.
---
.../timeline/DimensionRangeShardSpecBenchmark.java | 146 +++++++++++++++++++++
.../partition/DimensionRangeShardSpec.java | 40 +++---
.../partition/DimensionRangeShardSpecTest.java | 28 ++++
3 files changed, 194 insertions(+), 20 deletions(-)
diff --git a/benchmarks/src/test/java/org/apache/druid/timeline/DimensionRangeShardSpecBenchmark.java b/benchmarks/src/test/java/org/apache/druid/timeline/DimensionRangeShardSpecBenchmark.java
new file mode 100644
index 0000000000..d3a1c4c765
--- /dev/null
+++ b/benchmarks/src/test/java/org/apache/druid/timeline/DimensionRangeShardSpecBenchmark.java
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.timeline;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableRangeSet;
+import com.google.common.collect.Range;
+import com.google.common.collect.RangeSet;
+import org.apache.druid.common.config.NullHandling;
+import org.apache.druid.data.input.StringTuple;
+import org.apache.druid.java.util.common.StringUtils;
+import org.apache.druid.query.filter.InDimFilter;
+import org.apache.druid.timeline.partition.DimensionRangeShardSpec;
+import org.junit.Assert;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OperationsPerInvocation;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+
+@State(Scope.Benchmark)
+@Fork(value = 1, jvmArgsAppend = {"-Xms128m", "-Xmx128m", "-XX:+UseG1GC"})
+@Warmup(iterations = 10)
+@Measurement(iterations = 10)
+@BenchmarkMode({Mode.AverageTime})
+@OutputTimeUnit(TimeUnit.NANOSECONDS)
+@OperationsPerInvocation(3) // 3 shard specs per invocation
+@SuppressWarnings("UnstableApiUsage") // Range, RangeSet are unstable APIs
+public class DimensionRangeShardSpecBenchmark
+{
+ private final Map<String, RangeSet<String>> domainSinglePrunable =
+ ImmutableMap.<String, RangeSet<String>>builder()
+ .put("country", ImmutableRangeSet.of(Range.singleton("India")))
+ .build();
+
+ // Initialized in @Setup method.
+ private Map<String, RangeSet<String>> domain5kPrunable;
+
+ private final Map<String, RangeSet<String>> domainSingleNonPrunable =
+ ImmutableMap.<String, RangeSet<String>>builder()
+ .put("country", ImmutableRangeSet.of(Range.singleton("Shanghai")))
+ .build();
+
+ // Initial segment (null -> values)
+ private final DimensionRangeShardSpec shardSpec0 = new DimensionRangeShardSpec(
+ Arrays.asList("country", "city"),
+ new StringTuple(new String[]{null, null}),
+ new StringTuple(new String[]{"Germany", "Munich"}),
+ 0,
+ 4
+ );
+
+ // Middle segment (values -> other values)
+ private final DimensionRangeShardSpec shardSpec1 = new DimensionRangeShardSpec(
+ Arrays.asList("country", "city"),
+ new StringTuple(new String[]{"Germany", "Munich"}),
+ new StringTuple(new String[]{"United States", "New York"}),
+ 1,
+ 4
+ );
+
+ // End segment (values -> null)
+ private final DimensionRangeShardSpec shardSpec2 = new DimensionRangeShardSpec(
+ Arrays.asList("country", "city"),
+ new StringTuple(new String[]{"United States", "New York"}),
+ new StringTuple(new String[]{null, null}),
+ 2,
+ 4
+ );
+
+ @Setup
+ public void setUp()
+ {
+ NullHandling.initializeForTests();
+
+ final Set<String> strings5k = new HashSet<>();
+
+ final Random random = new Random(0); // Random... ish.
+ for (int i = 0; i < 5000; i++) {
+ // Uppercase so the strings end up in different shardSpecs
+ final String s = StringUtils.format(
+ "%s%s",
+ String.valueOf((char) ('A' + random.nextInt(26))),
+ String.valueOf(random.nextInt())
+ );
+ strings5k.add(s);
+ }
+
+ final RangeSet<String> rangeSet5k = new InDimFilter("country", strings5k).getDimensionRangeSet("country");
+ domain5kPrunable = ImmutableMap.of("country", rangeSet5k);
+ }
+
+ @Benchmark
+ public void benchSinglePrunable()
+ {
+ Assert.assertFalse(shardSpec0.possibleInDomain(domainSinglePrunable));
+ Assert.assertTrue(shardSpec1.possibleInDomain(domainSinglePrunable));
+ Assert.assertFalse(shardSpec2.possibleInDomain(domainSinglePrunable));
+ }
+
+ @Benchmark
+ public void bench5kPrunable()
+ {
+ Assert.assertTrue(shardSpec0.possibleInDomain(domain5kPrunable));
+ Assert.assertTrue(shardSpec1.possibleInDomain(domain5kPrunable));
+ Assert.assertTrue(shardSpec2.possibleInDomain(domain5kPrunable));
+ }
+
+ @Benchmark
+ public void benchSingleNonPrunable()
+ {
+ Assert.assertTrue(shardSpec0.possibleInDomain(domainSingleNonPrunable));
+ Assert.assertTrue(shardSpec1.possibleInDomain(domainSingleNonPrunable));
+ Assert.assertTrue(shardSpec2.possibleInDomain(domainSingleNonPrunable));
+ }
+}
diff --git a/core/src/main/java/org/apache/druid/timeline/partition/DimensionRangeShardSpec.java b/core/src/main/java/org/apache/druid/timeline/partition/DimensionRangeShardSpec.java
index 543931a888..4054a618a7 100644
--- a/core/src/main/java/org/apache/druid/timeline/partition/DimensionRangeShardSpec.java
+++ b/core/src/main/java/org/apache/druid/timeline/partition/DimensionRangeShardSpec.java
@@ -29,6 +29,7 @@ import org.apache.druid.data.input.StringTuple;
import javax.annotation.Nullable;
import java.util.Collections;
+import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
@@ -114,22 +115,6 @@ public class DimensionRangeShardSpec extends BaseDimensionRangeShardSpec
return Collections.unmodifiableList(dimensions);
}
- /**
- * Check if a given domain of Strings is a singleton set containing the given value
- * @param rangeSet Domain of Strings
- * @param val Value of String
- * @return rangeSet == {val}
- */
- private boolean isRangeSetSingletonWithVal(RangeSet<String> rangeSet, String val)
- {
- if (val == null) {
- return false;
- }
- return rangeSet.asRanges().equals(
- Collections.singleton(Range.singleton(val))
- );
- }
-
/**
* Set[:i] is the cartesian product of Set[0],...,Set[i - 1]
* EffectiveDomain[:i] is defined as QueryDomain[:i] INTERSECTION SegmentRange[:i]
@@ -223,16 +208,31 @@ public class DimensionRangeShardSpec extends BaseDimensionRangeShardSpec
// EffectiveDomain[i] = QueryDomain[i] INTERSECTION SegmentRange[i]
RangeSet<String> effectiveDomainForDimension = queryDomainForDimension.subRangeSet(rangeTillSegmentBoundary);
+
+ // Create an iterator to use for checking if the RangeSet is empty or is a singleton. This is significantly
+ // faster than using isEmpty() and equals(), because those methods call size() internally, which iterates
+ // the entire RangeSet.
+ final Iterator<Range<String>> effectiveDomainRangeIterator = effectiveDomainForDimension.asRanges().iterator();
+
// Prune segment because query domain is out of segment range
- if (effectiveDomainForDimension.isEmpty()) {
+ if (!effectiveDomainRangeIterator.hasNext()) {
return false;
}
- // EffectiveDomain is singleton and lies only on the boundaries -> consider next dimensions
+ final Range<String> firstRange = effectiveDomainRangeIterator.next();
+ final boolean effectiveDomainIsSingleRange = !effectiveDomainRangeIterator.hasNext();
+
+ // Effective domain contained only one Range.
+ // If it's a singleton and lies only on the boundaries -> consider next dimensions
effectiveDomainIsStart = effectiveDomainIsStart
- && isRangeSetSingletonWithVal(effectiveDomainForDimension, segmentStart.get(i));
+ && effectiveDomainIsSingleRange
+ && segmentStart.get(i) != null
+ && firstRange.equals(Range.singleton(segmentStart.get(i)));
+
effectiveDomainIsEnd = effectiveDomainIsEnd
- && isRangeSetSingletonWithVal(effectiveDomainForDimension, segmentEnd.get(i));
+ && effectiveDomainIsSingleRange
+ && segmentEnd.get(i) != null
+ && firstRange.equals(Range.singleton(segmentEnd.get(i)));
// EffectiveDomain lies within the boundaries as well -> cannot prune based on next dimensions
if (!effectiveDomainIsStart && !effectiveDomainIsEnd) {
diff --git a/core/src/test/java/org/apache/druid/timeline/partition/DimensionRangeShardSpecTest.java b/core/src/test/java/org/apache/druid/timeline/partition/DimensionRangeShardSpecTest.java
index 0e29057a54..9c9fe99e95 100644
--- a/core/src/test/java/org/apache/druid/timeline/partition/DimensionRangeShardSpecTest.java
+++ b/core/src/test/java/org/apache/druid/timeline/partition/DimensionRangeShardSpecTest.java
@@ -415,6 +415,34 @@ public class DimensionRangeShardSpecTest
);
assertFalse(shard.possibleInDomain(domain));
}
+ @Test
+ public void testPossibleInDomain_falsePruning()
+ {
+ setDimensions("planet", "country", "city");
+
+ final StringTuple start = StringTuple.create("Earth", "France", "Paris");
+ final StringTuple end = StringTuple.create("Mars", "USA", "New York");
+
+ final RangeSet<String> universalSet = TreeRangeSet.create();
+ universalSet.add(Range.all());
+
+ ShardSpec shard = new DimensionRangeShardSpec(dimensions, start, end, 0, null);
+ Map<String, RangeSet<String>> domain = new HashMap<>();
+
+ // {Earth} U {Mars} * (USA, INF) * (-INF, INF)
+ populateDomain(
+ domain,
+ getUnion(
+ getRangeSet(Range.singleton("Earth")),
+ getRangeSet(Range.singleton("Mars"))
+ ),
+ getUnion(
+ getRangeSet(Range.greaterThan("USA"))
+ ),
+ universalSet
+ );
+ assertTrue(shard.possibleInDomain(domain));
+ }
private RangeSet<String> getRangeSet(Range range)
{
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org