You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by gi...@apache.org on 2022/04/27 21:20:41 UTC

[druid] branch master updated: DimensionRangeShardSpec speed boost. (#12477)

This is an automated email from the ASF dual-hosted git repository.

gian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new 7b89682bbe DimensionRangeShardSpec speed boost. (#12477)
7b89682bbe is described below

commit 7b89682bbecc0feffb1996f90f6d8905458c054b
Author: Gian Merlino <gi...@imply.io>
AuthorDate: Wed Apr 27 14:20:35 2022 -0700

    DimensionRangeShardSpec speed boost. (#12477)
    
    * DimensionRangeShardSpec speed boost.
    
    Calling isEmpty() and equals() on RangeSets is expensive, because these
    fall back on default implementations that call size(). And size() is
    _also_ a default implementation that iterates the entire collection.
    
    * Fix and test from code review.
---
 .../timeline/DimensionRangeShardSpecBenchmark.java | 146 +++++++++++++++++++++
 .../partition/DimensionRangeShardSpec.java         |  40 +++---
 .../partition/DimensionRangeShardSpecTest.java     |  28 ++++
 3 files changed, 194 insertions(+), 20 deletions(-)

diff --git a/benchmarks/src/test/java/org/apache/druid/timeline/DimensionRangeShardSpecBenchmark.java b/benchmarks/src/test/java/org/apache/druid/timeline/DimensionRangeShardSpecBenchmark.java
new file mode 100644
index 0000000000..d3a1c4c765
--- /dev/null
+++ b/benchmarks/src/test/java/org/apache/druid/timeline/DimensionRangeShardSpecBenchmark.java
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.druid.timeline;
+
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableRangeSet;
+import com.google.common.collect.Range;
+import com.google.common.collect.RangeSet;
+import org.apache.druid.common.config.NullHandling;
+import org.apache.druid.data.input.StringTuple;
+import org.apache.druid.java.util.common.StringUtils;
+import org.apache.druid.query.filter.InDimFilter;
+import org.apache.druid.timeline.partition.DimensionRangeShardSpec;
+import org.junit.Assert;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OperationsPerInvocation;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Random;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+
+@State(Scope.Benchmark)
+@Fork(value = 1, jvmArgsAppend = {"-Xms128m", "-Xmx128m", "-XX:+UseG1GC"})
+@Warmup(iterations = 10)
+@Measurement(iterations = 10)
+@BenchmarkMode({Mode.AverageTime})
+@OutputTimeUnit(TimeUnit.NANOSECONDS)
+@OperationsPerInvocation(3) // 3 shard specs per invocation
+@SuppressWarnings("UnstableApiUsage") // Range, RangeSet are unstable APIs
+public class DimensionRangeShardSpecBenchmark
+{
+  private final Map<String, RangeSet<String>> domainSinglePrunable =
+      ImmutableMap.<String, RangeSet<String>>builder()
+                  .put("country", ImmutableRangeSet.of(Range.singleton("India")))
+                  .build();
+
+  // Initialized in @Setup method.
+  private Map<String, RangeSet<String>> domain5kPrunable;
+
+  private final Map<String, RangeSet<String>> domainSingleNonPrunable =
+      ImmutableMap.<String, RangeSet<String>>builder()
+                  .put("country", ImmutableRangeSet.of(Range.singleton("Shanghai")))
+                  .build();
+
+  // Initial segment (null -> values)
+  private final DimensionRangeShardSpec shardSpec0 = new DimensionRangeShardSpec(
+      Arrays.asList("country", "city"),
+      new StringTuple(new String[]{null, null}),
+      new StringTuple(new String[]{"Germany", "Munich"}),
+      0,
+      4
+  );
+
+  // Middle segment (values -> other values)
+  private final DimensionRangeShardSpec shardSpec1 = new DimensionRangeShardSpec(
+      Arrays.asList("country", "city"),
+      new StringTuple(new String[]{"Germany", "Munich"}),
+      new StringTuple(new String[]{"United States", "New York"}),
+      1,
+      4
+  );
+
+  // End segment (values -> null)
+  private final DimensionRangeShardSpec shardSpec2 = new DimensionRangeShardSpec(
+      Arrays.asList("country", "city"),
+      new StringTuple(new String[]{"United States", "New York"}),
+      new StringTuple(new String[]{null, null}),
+      2,
+      4
+  );
+
+  @Setup
+  public void setUp()
+  {
+    NullHandling.initializeForTests();
+
+    final Set<String> strings5k = new HashSet<>();
+
+    final Random random = new Random(0); // Random... ish.
+    for (int i = 0; i < 5000; i++) {
+      // Uppercase so the strings end up in different shardSpecs
+      final String s = StringUtils.format(
+          "%s%s",
+          String.valueOf((char) ('A' + random.nextInt(26))),
+          String.valueOf(random.nextInt())
+      );
+      strings5k.add(s);
+    }
+
+    final RangeSet<String> rangeSet5k = new InDimFilter("country", strings5k).getDimensionRangeSet("country");
+    domain5kPrunable = ImmutableMap.of("country", rangeSet5k);
+  }
+
+  @Benchmark
+  public void benchSinglePrunable()
+  {
+    Assert.assertFalse(shardSpec0.possibleInDomain(domainSinglePrunable));
+    Assert.assertTrue(shardSpec1.possibleInDomain(domainSinglePrunable));
+    Assert.assertFalse(shardSpec2.possibleInDomain(domainSinglePrunable));
+  }
+
+  @Benchmark
+  public void bench5kPrunable()
+  {
+    Assert.assertTrue(shardSpec0.possibleInDomain(domain5kPrunable));
+    Assert.assertTrue(shardSpec1.possibleInDomain(domain5kPrunable));
+    Assert.assertTrue(shardSpec2.possibleInDomain(domain5kPrunable));
+  }
+
+  @Benchmark
+  public void benchSingleNonPrunable()
+  {
+    Assert.assertTrue(shardSpec0.possibleInDomain(domainSingleNonPrunable));
+    Assert.assertTrue(shardSpec1.possibleInDomain(domainSingleNonPrunable));
+    Assert.assertTrue(shardSpec2.possibleInDomain(domainSingleNonPrunable));
+  }
+}
diff --git a/core/src/main/java/org/apache/druid/timeline/partition/DimensionRangeShardSpec.java b/core/src/main/java/org/apache/druid/timeline/partition/DimensionRangeShardSpec.java
index 543931a888..4054a618a7 100644
--- a/core/src/main/java/org/apache/druid/timeline/partition/DimensionRangeShardSpec.java
+++ b/core/src/main/java/org/apache/druid/timeline/partition/DimensionRangeShardSpec.java
@@ -29,6 +29,7 @@ import org.apache.druid.data.input.StringTuple;
 
 import javax.annotation.Nullable;
 import java.util.Collections;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
@@ -114,22 +115,6 @@ public class DimensionRangeShardSpec extends BaseDimensionRangeShardSpec
     return Collections.unmodifiableList(dimensions);
   }
 
-  /**
-   * Check if a given domain of Strings is a singleton set containing the given value
-   * @param rangeSet Domain of Strings
-   * @param val Value of String
-   * @return rangeSet == {val}
-   */
-  private boolean isRangeSetSingletonWithVal(RangeSet<String> rangeSet, String val)
-  {
-    if (val == null) {
-      return false;
-    }
-    return rangeSet.asRanges().equals(
-        Collections.singleton(Range.singleton(val))
-    );
-  }
-
   /**
    * Set[:i] is the cartesian product of Set[0],...,Set[i - 1]
    * EffectiveDomain[:i] is defined as QueryDomain[:i] INTERSECTION SegmentRange[:i]
@@ -223,16 +208,31 @@ public class DimensionRangeShardSpec extends BaseDimensionRangeShardSpec
 
       // EffectiveDomain[i] = QueryDomain[i] INTERSECTION SegmentRange[i]
       RangeSet<String> effectiveDomainForDimension = queryDomainForDimension.subRangeSet(rangeTillSegmentBoundary);
+
+      // Create an iterator to use for checking if the RangeSet is empty or is a singleton. This is significantly
+      // faster than using isEmpty() and equals(), because those methods call size() internally, which iterates
+      // the entire RangeSet.
+      final Iterator<Range<String>> effectiveDomainRangeIterator = effectiveDomainForDimension.asRanges().iterator();
+
       // Prune segment because query domain is out of segment range
-      if (effectiveDomainForDimension.isEmpty()) {
+      if (!effectiveDomainRangeIterator.hasNext()) {
         return false;
       }
 
-      // EffectiveDomain is singleton and lies only on the boundaries -> consider next dimensions
+      final Range<String> firstRange = effectiveDomainRangeIterator.next();
+      final boolean effectiveDomainIsSingleRange = !effectiveDomainRangeIterator.hasNext();
+
+      // Effective domain contained only one Range.
+      // If it's a singleton and lies only on the boundaries -> consider next dimensions
       effectiveDomainIsStart = effectiveDomainIsStart
-                                && isRangeSetSingletonWithVal(effectiveDomainForDimension, segmentStart.get(i));
+                               && effectiveDomainIsSingleRange
+                               && segmentStart.get(i) != null
+                               && firstRange.equals(Range.singleton(segmentStart.get(i)));
+
       effectiveDomainIsEnd = effectiveDomainIsEnd
-                           && isRangeSetSingletonWithVal(effectiveDomainForDimension, segmentEnd.get(i));
+                             && effectiveDomainIsSingleRange
+                             && segmentEnd.get(i) != null
+                             && firstRange.equals(Range.singleton(segmentEnd.get(i)));
 
       // EffectiveDomain lies within the boundaries as well -> cannot prune based on next dimensions
       if (!effectiveDomainIsStart && !effectiveDomainIsEnd) {
diff --git a/core/src/test/java/org/apache/druid/timeline/partition/DimensionRangeShardSpecTest.java b/core/src/test/java/org/apache/druid/timeline/partition/DimensionRangeShardSpecTest.java
index 0e29057a54..9c9fe99e95 100644
--- a/core/src/test/java/org/apache/druid/timeline/partition/DimensionRangeShardSpecTest.java
+++ b/core/src/test/java/org/apache/druid/timeline/partition/DimensionRangeShardSpecTest.java
@@ -415,6 +415,34 @@ public class DimensionRangeShardSpecTest
     );
     assertFalse(shard.possibleInDomain(domain));
   }
+  @Test
+  public void testPossibleInDomain_falsePruning()
+  {
+    setDimensions("planet", "country", "city");
+
+    final StringTuple start = StringTuple.create("Earth", "France", "Paris");
+    final StringTuple end = StringTuple.create("Mars", "USA", "New York");
+
+    final RangeSet<String> universalSet = TreeRangeSet.create();
+    universalSet.add(Range.all());
+
+    ShardSpec shard = new DimensionRangeShardSpec(dimensions, start, end, 0, null);
+    Map<String, RangeSet<String>> domain = new HashMap<>();
+
+    // {Earth} U {Mars} * (USA, INF) * (-INF, INF)
+    populateDomain(
+        domain,
+        getUnion(
+            getRangeSet(Range.singleton("Earth")),
+            getRangeSet(Range.singleton("Mars"))
+        ),
+        getUnion(
+            getRangeSet(Range.greaterThan("USA"))
+        ),
+        universalSet
+    );
+    assertTrue(shard.possibleInDomain(domain));
+  }
 
   private RangeSet<String> getRangeSet(Range range)
   {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org