You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@druid.apache.org by gi...@apache.org on 2020/08/07 01:34:44 UTC

[druid] branch master updated: Combine InDimFilter, InFilter. (#10119)

This is an automated email from the ASF dual-hosted git repository.

gian pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/druid.git


The following commit(s) were added to refs/heads/master by this push:
     new 1700317  Combine InDimFilter, InFilter. (#10119)
1700317 is described below

commit 170031744e8a2ed5fc4c06062df014bcfcfe2e1f
Author: Gian Merlino <gi...@gmail.com>
AuthorDate: Thu Aug 6 18:34:21 2020 -0700

    Combine InDimFilter, InFilter. (#10119)
    
    * Combine InDimFilter, InFilter.
    
    There are two motivations:
    
    1. Ensure that when HashJoinSegmentStorageAdapter compares its Filter
       to the original one, and it is an "in" type, the comparison is by
       reference and does not need to check deep equality. This is useful
       when the "in" filter is very large.
    2. Simplify things. (There isn't a great reason for the DimFilter and
       Filter logic to be separate, and combining them reduces some
       duplication.)
    
    * Fix test.
---
 .../org/apache/druid/query/filter/InDimFilter.java | 445 ++++++++++++++++-----
 .../org/apache/druid/segment/filter/Filters.java   |   4 +-
 .../org/apache/druid/segment/filter/InFilter.java  | 336 ----------------
 .../join/HashJoinSegmentStorageAdapter.java        |   3 +-
 .../apache/druid/segment/filter/InFilterTest.java  |  22 +-
 5 files changed, 352 insertions(+), 458 deletions(-)

diff --git a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java
index 25688ba..8e13cf1 100644
--- a/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java
+++ b/processing/src/main/java/org/apache/druid/query/filter/InDimFilter.java
@@ -26,6 +26,7 @@ import com.fasterxml.jackson.annotation.JsonProperty;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Joiner;
 import com.google.common.base.Preconditions;
+import com.google.common.base.Predicate;
 import com.google.common.base.Supplier;
 import com.google.common.base.Suppliers;
 import com.google.common.collect.ImmutableSet;
@@ -37,17 +38,29 @@ import com.google.common.collect.TreeRangeSet;
 import com.google.common.hash.Hasher;
 import com.google.common.hash.Hashing;
 import it.unimi.dsi.fastutil.ints.IntArrayList;
+import it.unimi.dsi.fastutil.ints.IntIterable;
+import it.unimi.dsi.fastutil.ints.IntIterator;
 import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
 import it.unimi.dsi.fastutil.longs.LongArrayList;
 import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
+import org.apache.druid.collections.bitmap.ImmutableBitmap;
 import org.apache.druid.common.config.NullHandling;
+import org.apache.druid.java.util.common.IAE;
 import org.apache.druid.java.util.common.StringUtils;
+import org.apache.druid.query.BitmapResultFactory;
 import org.apache.druid.query.cache.CacheKeyBuilder;
 import org.apache.druid.query.extraction.ExtractionFn;
+import org.apache.druid.query.filter.vector.VectorValueMatcher;
+import org.apache.druid.query.filter.vector.VectorValueMatcherColumnProcessorFactory;
 import org.apache.druid.query.lookup.LookupExtractionFn;
 import org.apache.druid.query.lookup.LookupExtractor;
+import org.apache.druid.segment.ColumnSelector;
+import org.apache.druid.segment.ColumnSelectorFactory;
 import org.apache.druid.segment.DimensionHandlerUtils;
-import org.apache.druid.segment.filter.InFilter;
+import org.apache.druid.segment.IntIteratorUtils;
+import org.apache.druid.segment.column.BitmapIndex;
+import org.apache.druid.segment.filter.Filters;
+import org.apache.druid.segment.vector.VectorColumnSelectorFactory;
 
 import javax.annotation.Nullable;
 import java.nio.charset.StandardCharsets;
@@ -56,12 +69,14 @@ import java.util.Arrays;
 import java.util.Collection;
 import java.util.Comparator;
 import java.util.HashSet;
+import java.util.Iterator;
 import java.util.List;
+import java.util.Map;
 import java.util.Objects;
 import java.util.Set;
 import java.util.stream.Collectors;
 
-public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilter
+public class InDimFilter extends AbstractOptimizableDimFilter implements Filter
 {
   // determined through benchmark that binary search on long[] is faster than HashSet until ~16 elements
   // Hashing threshold is not applied to String for now, String still uses ImmutableSortedSet
@@ -74,12 +89,10 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt
   private final ExtractionFn extractionFn;
   @Nullable
   private final FilterTuning filterTuning;
-  private final Supplier<DruidLongPredicate> longPredicateSupplier;
-  private final Supplier<DruidFloatPredicate> floatPredicateSupplier;
-  private final Supplier<DruidDoublePredicate> doublePredicateSupplier;
+  private final DruidPredicateFactory predicateFactory;
 
   @JsonIgnore
-  private byte[] cacheKey;
+  private final Supplier<byte[]> cacheKeySupplier;
 
   @JsonCreator
   public InDimFilter(
@@ -91,8 +104,42 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt
       @JsonProperty("filterTuning") @Nullable FilterTuning filterTuning
   )
   {
-    Preconditions.checkNotNull(dimension, "dimension can not be null");
-    Preconditions.checkArgument(values != null, "values can not be null");
+    this(
+        dimension,
+        values,
+        extractionFn,
+        filterTuning,
+        null
+    );
+  }
+
+  /**
+   * This constructor should be called only in unit tests since accepting a Collection makes copying more likely.
+   */
+  @VisibleForTesting
+  public InDimFilter(String dimension, Collection<String> values, @Nullable ExtractionFn extractionFn)
+  {
+    this(
+        dimension,
+        values instanceof Set ? (Set<String>) values : new HashSet<>(values),
+        extractionFn,
+        null,
+        null
+    );
+  }
+
+  /**
+   * Internal constructor.
+   */
+  private InDimFilter(
+      final String dimension,
+      final Set<String> values,
+      @Nullable final ExtractionFn extractionFn,
+      @Nullable final FilterTuning filterTuning,
+      @Nullable final DruidPredicateFactory predicateFactory
+  )
+  {
+    Preconditions.checkNotNull(values, "values cannot be null");
 
     // The values set can be huge. Try to avoid copying the set if possible.
     // Note that we may still need to copy values to a list for caching. See getCacheKey().
@@ -101,21 +148,18 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt
     } else {
       this.values = values.stream().map(NullHandling::emptyToNullIfNeeded).collect(Collectors.toSet());
     }
-    this.dimension = dimension;
+
+    this.dimension = Preconditions.checkNotNull(dimension, "dimension cannot be null");
     this.extractionFn = extractionFn;
     this.filterTuning = filterTuning;
-    this.longPredicateSupplier = getLongPredicateSupplier();
-    this.floatPredicateSupplier = getFloatPredicateSupplier();
-    this.doublePredicateSupplier = getDoublePredicateSupplier();
-  }
 
-  /**
-   * This constructor should be called only in unit tests since it creates a new hash set wrapping the given values.
-   */
-  @VisibleForTesting
-  public InDimFilter(String dimension, Collection<String> values, @Nullable ExtractionFn extractionFn)
-  {
-    this(dimension, new HashSet<>(values), extractionFn, null);
+    if (predicateFactory != null) {
+      this.predicateFactory = predicateFactory;
+    } else {
+      this.predicateFactory = new InFilterDruidPredicateFactory(extractionFn, this.values);
+    }
+
+    this.cacheKeySupplier = Suppliers.memoize(this::computeCacheKey);
   }
 
   @JsonProperty
@@ -149,26 +193,7 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt
   @Override
   public byte[] getCacheKey()
   {
-    if (cacheKey == null) {
-      final List<String> sortedValues = new ArrayList<>(values);
-      sortedValues.sort(Comparator.nullsFirst(Ordering.natural()));
-      final Hasher hasher = Hashing.sha256().newHasher();
-      for (String v : sortedValues) {
-        if (v == null) {
-          hasher.putInt(0);
-        } else {
-          hasher.putString(v, StandardCharsets.UTF_8);
-        }
-      }
-      cacheKey = new CacheKeyBuilder(DimFilterUtils.IN_CACHE_ID)
-          .appendString(dimension)
-          .appendByte(DimFilterUtils.STRING_SEPARATOR)
-          .appendByteArray(extractionFn == null ? new byte[0] : extractionFn.getCacheKey())
-          .appendByte(DimFilterUtils.STRING_SEPARATOR)
-          .appendByteArray(hasher.hash().asBytes())
-          .build();
-    }
-    return cacheKey;
+    return cacheKeySupplier.get();
   }
 
   @Override
@@ -190,55 +215,10 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt
     return inFilter;
   }
 
-  private InDimFilter optimizeLookup()
-  {
-    if (extractionFn instanceof LookupExtractionFn
-        && ((LookupExtractionFn) extractionFn).isOptimize()) {
-      LookupExtractionFn exFn = (LookupExtractionFn) extractionFn;
-      LookupExtractor lookup = exFn.getLookup();
-
-      final Set<String> keys = new HashSet<>();
-      for (String value : values) {
-
-        // We cannot do an unapply()-based optimization if the selector value
-        // and the replaceMissingValuesWith value are the same, since we have to match on
-        // all values that are not present in the lookup.
-        final String convertedValue = NullHandling.emptyToNullIfNeeded(value);
-        if (!exFn.isRetainMissingValue() && Objects.equals(convertedValue, exFn.getReplaceMissingValueWith())) {
-          return this;
-        }
-        keys.addAll(lookup.unapply(convertedValue));
-
-        // If retainMissingValues is true and the selector value is not in the lookup map,
-        // there may be row values that match the selector value but are not included
-        // in the lookup map. Match on the selector value as well.
-        // If the selector value is overwritten in the lookup map, don't add selector value to keys.
-        if (exFn.isRetainMissingValue() && NullHandling.isNullOrEquivalent(lookup.apply(convertedValue))) {
-          keys.add(convertedValue);
-        }
-      }
-
-      if (keys.isEmpty()) {
-        return this;
-      } else {
-        return new InDimFilter(dimension, keys, null, filterTuning);
-      }
-    }
-    return this;
-  }
-
   @Override
   public Filter toFilter()
   {
-    return new InFilter(
-        dimension,
-        values,
-        longPredicateSupplier,
-        floatPredicateSupplier,
-        doublePredicateSupplier,
-        extractionFn,
-        filterTuning
-    );
+    return this;
   }
 
   @Nullable
@@ -270,6 +250,108 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt
   }
 
   @Override
+  public <T> T getBitmapResult(BitmapIndexSelector selector, BitmapResultFactory<T> bitmapResultFactory)
+  {
+    if (extractionFn == null) {
+      final BitmapIndex bitmapIndex = selector.getBitmapIndex(dimension);
+      return bitmapResultFactory.unionDimensionValueBitmaps(getBitmapIterable(values, bitmapIndex));
+    } else {
+      return Filters.matchPredicate(
+          dimension,
+          selector,
+          bitmapResultFactory,
+          predicateFactory.makeStringPredicate()
+      );
+    }
+  }
+
+  @Override
+  public double estimateSelectivity(BitmapIndexSelector indexSelector)
+  {
+    if (extractionFn == null) {
+      final BitmapIndex bitmapIndex = indexSelector.getBitmapIndex(dimension);
+      return Filters.estimateSelectivity(
+          bitmapIndex,
+          IntIteratorUtils.toIntList(getBitmapIndexIterable(values, bitmapIndex).iterator()),
+          indexSelector.getNumRows()
+      );
+    } else {
+      return Filters.estimateSelectivity(
+          dimension,
+          indexSelector,
+          predicateFactory.makeStringPredicate()
+      );
+    }
+  }
+
+  @Override
+  public ValueMatcher makeMatcher(ColumnSelectorFactory factory)
+  {
+    return Filters.makeValueMatcher(factory, dimension, predicateFactory);
+  }
+
+  @Override
+  public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory factory)
+  {
+    return DimensionHandlerUtils.makeVectorProcessor(
+        dimension,
+        VectorValueMatcherColumnProcessorFactory.instance(),
+        factory
+    ).makeMatcher(predicateFactory);
+  }
+
+  @Override
+  public boolean canVectorizeMatcher()
+  {
+    return true;
+  }
+
+  @Override
+  public boolean supportsRequiredColumnRewrite()
+  {
+    return true;
+  }
+
+  @Override
+  public Filter rewriteRequiredColumns(Map<String, String> columnRewrites)
+  {
+    String rewriteDimensionTo = columnRewrites.get(dimension);
+    if (rewriteDimensionTo == null) {
+      throw new IAE("Received a non-applicable rewrite: %s, filter's dimension: %s", columnRewrites, dimension);
+    }
+
+    if (rewriteDimensionTo.equals(dimension)) {
+      return this;
+    } else {
+      return new InDimFilter(
+          rewriteDimensionTo,
+          values,
+          extractionFn,
+          filterTuning,
+          predicateFactory
+      );
+    }
+  }
+
+  @Override
+  public boolean supportsBitmapIndex(BitmapIndexSelector selector)
+  {
+    return selector.getBitmapIndex(dimension) != null;
+  }
+
+  @Override
+  public boolean shouldUseBitmapIndex(BitmapIndexSelector selector)
+  {
+    return Filters.shouldUseBitmapIndex(this, selector, filterTuning);
+  }
+
+  @Override
+  public boolean supportsSelectivityEstimation(ColumnSelector columnSelector, BitmapIndexSelector indexSelector)
+  {
+    return Filters.supportsSelectivityEstimation(this, dimension, columnSelector, indexSelector);
+  }
+
+  @Override
   public String toString()
   {
     final DimFilterToStringBuilder builder = new DimFilterToStringBuilder();
@@ -303,7 +385,90 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt
     return Objects.hash(values, dimension, extractionFn, filterTuning);
   }
 
-  private DruidLongPredicate createLongPredicate()
+  private byte[] computeCacheKey()
+  {
+    final List<String> sortedValues = new ArrayList<>(values);
+    sortedValues.sort(Comparator.nullsFirst(Ordering.natural()));
+    final Hasher hasher = Hashing.sha256().newHasher();
+    for (String v : sortedValues) {
+      if (v == null) {
+        hasher.putInt(0);
+      } else {
+        hasher.putString(v, StandardCharsets.UTF_8);
+      }
+    }
+    return new CacheKeyBuilder(DimFilterUtils.IN_CACHE_ID)
+        .appendString(dimension)
+        .appendByte(DimFilterUtils.STRING_SEPARATOR)
+        .appendByteArray(extractionFn == null ? new byte[0] : extractionFn.getCacheKey())
+        .appendByte(DimFilterUtils.STRING_SEPARATOR)
+        .appendByteArray(hasher.hash().asBytes())
+        .build();
+  }
+
+  private InDimFilter optimizeLookup()
+  {
+    if (extractionFn instanceof LookupExtractionFn
+        && ((LookupExtractionFn) extractionFn).isOptimize()) {
+      LookupExtractionFn exFn = (LookupExtractionFn) extractionFn;
+      LookupExtractor lookup = exFn.getLookup();
+
+      final Set<String> keys = new HashSet<>();
+      for (String value : values) {
+
+        // We cannot do an unapply()-based optimization if the selector value
+        // and the replaceMissingValuesWith value are the same, since we have to match on
+        // all values that are not present in the lookup.
+        final String convertedValue = NullHandling.emptyToNullIfNeeded(value);
+        if (!exFn.isRetainMissingValue() && Objects.equals(convertedValue, exFn.getReplaceMissingValueWith())) {
+          return this;
+        }
+        keys.addAll(lookup.unapply(convertedValue));
+
+        // If retainMissingValues is true and the selector value is not in the lookup map,
+        // there may be row values that match the selector value but are not included
+        // in the lookup map. Match on the selector value as well.
+        // If the selector value is overwritten in the lookup map, don't add selector value to keys.
+        if (exFn.isRetainMissingValue() && NullHandling.isNullOrEquivalent(lookup.apply(convertedValue))) {
+          keys.add(convertedValue);
+        }
+      }
+
+      if (keys.isEmpty()) {
+        return this;
+      } else {
+        return new InDimFilter(dimension, keys, null, filterTuning);
+      }
+    }
+    return this;
+  }
+
+  private static Iterable<ImmutableBitmap> getBitmapIterable(final Set<String> values, final BitmapIndex bitmapIndex)
+  {
+    return Filters.bitmapsFromIndexes(getBitmapIndexIterable(values, bitmapIndex), bitmapIndex);
+  }
+
+  private static IntIterable getBitmapIndexIterable(final Set<String> values, final BitmapIndex bitmapIndex)
+  {
+    return () -> new IntIterator()
+    {
+      final Iterator<String> iterator = values.iterator();
+
+      @Override
+      public boolean hasNext()
+      {
+        return iterator.hasNext();
+      }
+
+      @Override
+      public int nextInt()
+      {
+        return bitmapIndex.getIndex(iterator.next());
+      }
+    };
+  }
+
+  private static DruidLongPredicate createLongPredicate(final Set<String> values)
   {
     LongArrayList longs = new LongArrayList(values.size());
     for (String value : values) {
@@ -323,17 +488,7 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt
     }
   }
 
-  // As the set of filtered values can be large, parsing them as longs should be done only if needed, and only once.
-  // Pass in a common long predicate supplier to all filters created by .toFilter(), so that
-  // we only compute the long hashset/array once per query.
-  // This supplier must be thread-safe, since this DimFilter will be accessed in the query runners.
-  private Supplier<DruidLongPredicate> getLongPredicateSupplier()
-  {
-    Supplier<DruidLongPredicate> longPredicate = this::createLongPredicate;
-    return Suppliers.memoize(longPredicate);
-  }
-
-  private DruidFloatPredicate createFloatPredicate()
+  private static DruidFloatPredicate createFloatPredicate(final Set<String> values)
   {
     IntArrayList floatBits = new IntArrayList(values.size());
     for (String value : values) {
@@ -355,13 +510,7 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt
     }
   }
 
-  private Supplier<DruidFloatPredicate> getFloatPredicateSupplier()
-  {
-    Supplier<DruidFloatPredicate> floatPredicate = this::createFloatPredicate;
-    return Suppliers.memoize(floatPredicate);
-  }
-
-  private DruidDoublePredicate createDoublePredicate()
+  private static DruidDoublePredicate createDoublePredicate(final Set<String> values)
   {
     LongArrayList doubleBits = new LongArrayList(values.size());
     for (String value : values) {
@@ -383,9 +532,89 @@ public class InDimFilter extends AbstractOptimizableDimFilter implements DimFilt
     }
   }
 
-  private Supplier<DruidDoublePredicate> getDoublePredicateSupplier()
+  @VisibleForTesting
+  public static class InFilterDruidPredicateFactory implements DruidPredicateFactory
   {
-    Supplier<DruidDoublePredicate> doublePredicate = this::createDoublePredicate;
-    return Suppliers.memoize(doublePredicate);
+    private final ExtractionFn extractionFn;
+    private final Set<String> values;
+    private final Supplier<DruidLongPredicate> longPredicateSupplier;
+    private final Supplier<DruidFloatPredicate> floatPredicateSupplier;
+    private final Supplier<DruidDoublePredicate> doublePredicateSupplier;
+
+    InFilterDruidPredicateFactory(
+        final ExtractionFn extractionFn,
+        final Set<String> values
+    )
+    {
+      this.extractionFn = extractionFn;
+      this.values = values;
+
+      // As the set of filtered values can be large, parsing them as numbers should be done only if needed, and
+      // only once. Pass in a common long predicate supplier to all filters created by .toFilter(), so that we only
+      // compute the long hashset/array once per query. This supplier must be thread-safe, since this DimFilter will be
+      // accessed in the query runners.
+      this.longPredicateSupplier = Suppliers.memoize(() -> createLongPredicate(values));
+      this.floatPredicateSupplier = Suppliers.memoize(() -> createFloatPredicate(values));
+      this.doublePredicateSupplier = Suppliers.memoize(() -> createDoublePredicate(values));
+    }
+
+    @Override
+    public Predicate<String> makeStringPredicate()
+    {
+      if (extractionFn != null) {
+        return input -> values.contains(extractionFn.apply(input));
+      } else {
+        return values::contains;
+      }
+    }
+
+    @Override
+    public DruidLongPredicate makeLongPredicate()
+    {
+      if (extractionFn != null) {
+        return input -> values.contains(extractionFn.apply(input));
+      } else {
+        return longPredicateSupplier.get();
+      }
+    }
+
+    @Override
+    public DruidFloatPredicate makeFloatPredicate()
+    {
+      if (extractionFn != null) {
+        return input -> values.contains(extractionFn.apply(input));
+      } else {
+        return floatPredicateSupplier.get();
+      }
+    }
+
+    @Override
+    public DruidDoublePredicate makeDoublePredicate()
+    {
+      if (extractionFn != null) {
+        return input -> values.contains(extractionFn.apply(input));
+      }
+      return input -> doublePredicateSupplier.get().applyDouble(input);
+    }
+
+    @Override
+    public boolean equals(Object o)
+    {
+      if (this == o) {
+        return true;
+      }
+      if (o == null || getClass() != o.getClass()) {
+        return false;
+      }
+      InFilterDruidPredicateFactory that = (InFilterDruidPredicateFactory) o;
+      return Objects.equals(extractionFn, that.extractionFn) &&
+             Objects.equals(values, that.values);
+    }
+
+    @Override
+    public int hashCode()
+    {
+      return Objects.hash(extractionFn, values);
+    }
   }
 }
diff --git a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java
index 399adb9..9cbecba 100644
--- a/processing/src/main/java/org/apache/druid/segment/filter/Filters.java
+++ b/processing/src/main/java/org/apache/druid/segment/filter/Filters.java
@@ -160,7 +160,7 @@ public class Filters
    *
    * @return an iterable of bitmaps
    */
-  static Iterable<ImmutableBitmap> bitmapsFromIndexes(final IntIterable indexes, final BitmapIndex bitmapIndex)
+  public static Iterable<ImmutableBitmap> bitmapsFromIndexes(final IntIterable indexes, final BitmapIndex bitmapIndex)
   {
     // Do not use Iterables.transform() to avoid boxing/unboxing integers.
     return new Iterable<ImmutableBitmap>()
@@ -404,7 +404,7 @@ public class Filters
     };
   }
 
-  static boolean supportsSelectivityEstimation(
+  public static boolean supportsSelectivityEstimation(
       Filter filter,
       String dimension,
       ColumnSelector columnSelector,
diff --git a/processing/src/main/java/org/apache/druid/segment/filter/InFilter.java b/processing/src/main/java/org/apache/druid/segment/filter/InFilter.java
deleted file mode 100644
index dfc4d71..0000000
--- a/processing/src/main/java/org/apache/druid/segment/filter/InFilter.java
+++ /dev/null
@@ -1,336 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.druid.segment.filter;
-
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Predicate;
-import com.google.common.base.Supplier;
-import com.google.common.collect.ImmutableSet;
-import it.unimi.dsi.fastutil.ints.IntIterable;
-import it.unimi.dsi.fastutil.ints.IntIterator;
-import org.apache.druid.collections.bitmap.ImmutableBitmap;
-import org.apache.druid.java.util.common.IAE;
-import org.apache.druid.query.BitmapResultFactory;
-import org.apache.druid.query.extraction.ExtractionFn;
-import org.apache.druid.query.filter.BitmapIndexSelector;
-import org.apache.druid.query.filter.DruidDoublePredicate;
-import org.apache.druid.query.filter.DruidFloatPredicate;
-import org.apache.druid.query.filter.DruidLongPredicate;
-import org.apache.druid.query.filter.DruidPredicateFactory;
-import org.apache.druid.query.filter.Filter;
-import org.apache.druid.query.filter.FilterTuning;
-import org.apache.druid.query.filter.ValueMatcher;
-import org.apache.druid.query.filter.vector.VectorValueMatcher;
-import org.apache.druid.query.filter.vector.VectorValueMatcherColumnProcessorFactory;
-import org.apache.druid.segment.ColumnSelector;
-import org.apache.druid.segment.ColumnSelectorFactory;
-import org.apache.druid.segment.DimensionHandlerUtils;
-import org.apache.druid.segment.IntIteratorUtils;
-import org.apache.druid.segment.column.BitmapIndex;
-import org.apache.druid.segment.vector.VectorColumnSelectorFactory;
-
-import java.util.Iterator;
-import java.util.Map;
-import java.util.Objects;
-import java.util.Set;
-
-/**
- * The IN filter.
- * For single-valued dimension, this filter returns true if the dimension value matches to one of the
- * given {@link #values}.
- * For multi-valued dimension, this filter returns true if one of the dimension values matches to one of the
- * given {@link #values}.
- *
- * In SQL-compatible null handling mode, this filter is equivalent to {@code (dimension IN [values])} or
- * {@code (dimension IN [non-null values] OR dimension IS NULL)} when {@link #values} contains nulls.
- * In default null handling mode, this filter is equivalent to {@code (dimension IN [values])} or
- * {@code (dimension IN [non-null values, ''])} when {@link #values} contains nulls.
- */
-public class InFilter implements Filter
-{
-  private final String dimension;
-  private final Set<String> values;
-  private final ExtractionFn extractionFn;
-  private final FilterTuning filterTuning;
-  private final Supplier<DruidLongPredicate> longPredicateSupplier;
-  private final Supplier<DruidFloatPredicate> floatPredicateSupplier;
-  private final Supplier<DruidDoublePredicate> doublePredicateSupplier;
-
-  public InFilter(
-      String dimension,
-      Set<String> values,
-      Supplier<DruidLongPredicate> longPredicateSupplier,
-      Supplier<DruidFloatPredicate> floatPredicateSupplier,
-      Supplier<DruidDoublePredicate> doublePredicateSupplier,
-      ExtractionFn extractionFn,
-      FilterTuning filterTuning
-  )
-  {
-    this.dimension = dimension;
-    this.values = values;
-    this.extractionFn = extractionFn;
-    this.filterTuning = filterTuning;
-    this.longPredicateSupplier = longPredicateSupplier;
-    this.floatPredicateSupplier = floatPredicateSupplier;
-    this.doublePredicateSupplier = doublePredicateSupplier;
-  }
-
-  @Override
-  public <T> T getBitmapResult(BitmapIndexSelector selector, BitmapResultFactory<T> bitmapResultFactory)
-  {
-    if (extractionFn == null) {
-      final BitmapIndex bitmapIndex = selector.getBitmapIndex(dimension);
-      return bitmapResultFactory.unionDimensionValueBitmaps(getBitmapIterable(bitmapIndex));
-    } else {
-      return Filters.matchPredicate(
-          dimension,
-          selector,
-          bitmapResultFactory,
-          getPredicateFactory().makeStringPredicate()
-      );
-    }
-  }
-
-  @Override
-  public double estimateSelectivity(BitmapIndexSelector indexSelector)
-  {
-    if (extractionFn == null) {
-      final BitmapIndex bitmapIndex = indexSelector.getBitmapIndex(dimension);
-      return Filters.estimateSelectivity(
-          bitmapIndex,
-          IntIteratorUtils.toIntList(getBitmapIndexIterable(bitmapIndex).iterator()),
-          indexSelector.getNumRows()
-      );
-    } else {
-      return Filters.estimateSelectivity(
-          dimension,
-          indexSelector,
-          getPredicateFactory().makeStringPredicate()
-      );
-    }
-  }
-
-  private Iterable<ImmutableBitmap> getBitmapIterable(final BitmapIndex bitmapIndex)
-  {
-    return Filters.bitmapsFromIndexes(getBitmapIndexIterable(bitmapIndex), bitmapIndex);
-  }
-
-  private IntIterable getBitmapIndexIterable(final BitmapIndex bitmapIndex)
-  {
-    return () -> new IntIterator()
-    {
-      final Iterator<String> iterator = values.iterator();
-
-      @Override
-      public boolean hasNext()
-      {
-        return iterator.hasNext();
-      }
-
-      @Override
-      public int nextInt()
-      {
-        return bitmapIndex.getIndex(iterator.next());
-      }
-    };
-  }
-
-  @Override
-  public ValueMatcher makeMatcher(ColumnSelectorFactory factory)
-  {
-    return Filters.makeValueMatcher(factory, dimension, getPredicateFactory());
-  }
-
-  @Override
-  public VectorValueMatcher makeVectorMatcher(final VectorColumnSelectorFactory factory)
-  {
-    return DimensionHandlerUtils.makeVectorProcessor(
-        dimension,
-        VectorValueMatcherColumnProcessorFactory.instance(),
-        factory
-    ).makeMatcher(getPredicateFactory());
-  }
-
-  @Override
-  public boolean canVectorizeMatcher()
-  {
-    return true;
-  }
-
-  @Override
-  public Set<String> getRequiredColumns()
-  {
-    return ImmutableSet.of(dimension);
-  }
-
-  @Override
-  public boolean supportsRequiredColumnRewrite()
-  {
-    return true;
-  }
-
-  @Override
-  public Filter rewriteRequiredColumns(Map<String, String> columnRewrites)
-  {
-    String rewriteDimensionTo = columnRewrites.get(dimension);
-    if (rewriteDimensionTo == null) {
-      throw new IAE("Received a non-applicable rewrite: %s, filter's dimension: %s", columnRewrites, dimension);
-    }
-
-    return new InFilter(
-        rewriteDimensionTo,
-        values,
-        longPredicateSupplier,
-        floatPredicateSupplier,
-        doublePredicateSupplier,
-        extractionFn,
-        filterTuning
-    );
-  }
-
-  @Override
-  public boolean supportsBitmapIndex(BitmapIndexSelector selector)
-  {
-    return selector.getBitmapIndex(dimension) != null;
-  }
-
-  @Override
-  public boolean shouldUseBitmapIndex(BitmapIndexSelector selector)
-  {
-    return Filters.shouldUseBitmapIndex(this, selector, filterTuning);
-  }
-
-  @Override
-  public boolean supportsSelectivityEstimation(ColumnSelector columnSelector, BitmapIndexSelector indexSelector)
-  {
-    return Filters.supportsSelectivityEstimation(this, dimension, columnSelector, indexSelector);
-  }
-
-  private DruidPredicateFactory getPredicateFactory()
-  {
-    return new InFilterDruidPredicateFactory(extractionFn, values, longPredicateSupplier, floatPredicateSupplier, doublePredicateSupplier);
-  }
-
-  @Override
-  public boolean equals(Object o)
-  {
-    if (this == o) {
-      return true;
-    }
-    if (o == null || getClass() != o.getClass()) {
-      return false;
-    }
-    InFilter inFilter = (InFilter) o;
-    return Objects.equals(dimension, inFilter.dimension) &&
-           Objects.equals(values, inFilter.values) &&
-           Objects.equals(extractionFn, inFilter.extractionFn) &&
-           Objects.equals(filterTuning, inFilter.filterTuning);
-  }
-
-  @Override
-  public int hashCode()
-  {
-    return Objects.hash(dimension, values, extractionFn, filterTuning);
-  }
-
-  @VisibleForTesting
-  static class InFilterDruidPredicateFactory implements DruidPredicateFactory
-  {
-    private final ExtractionFn extractionFn;
-    private final Set<String> values;
-    private final Supplier<DruidLongPredicate> longPredicateSupplier;
-    private final Supplier<DruidFloatPredicate> floatPredicateSupplier;
-    private final Supplier<DruidDoublePredicate> doublePredicateSupplier;
-
-    InFilterDruidPredicateFactory(
-        ExtractionFn extractionFn,
-        Set<String> values,
-        Supplier<DruidLongPredicate> longPredicateSupplier,
-        Supplier<DruidFloatPredicate> floatPredicateSupplier,
-        Supplier<DruidDoublePredicate> doublePredicateSupplier
-    )
-    {
-      this.extractionFn = extractionFn;
-      this.values = values;
-      this.longPredicateSupplier = longPredicateSupplier;
-      this.floatPredicateSupplier = floatPredicateSupplier;
-      this.doublePredicateSupplier = doublePredicateSupplier;
-    }
-
-    @Override
-    public Predicate<String> makeStringPredicate()
-    {
-      if (extractionFn != null) {
-        return input -> values.contains(extractionFn.apply(input));
-      } else {
-        return input -> values.contains(input);
-      }
-    }
-
-    @Override
-    public DruidLongPredicate makeLongPredicate()
-    {
-      if (extractionFn != null) {
-        return input -> values.contains(extractionFn.apply(input));
-      } else {
-        return longPredicateSupplier.get();
-      }
-    }
-
-    @Override
-    public DruidFloatPredicate makeFloatPredicate()
-    {
-      if (extractionFn != null) {
-        return input -> values.contains(extractionFn.apply(input));
-      } else {
-        return floatPredicateSupplier.get();
-      }
-    }
-
-    @Override
-    public DruidDoublePredicate makeDoublePredicate()
-    {
-      if (extractionFn != null) {
-        return input -> values.contains(extractionFn.apply(input));
-      }
-      return input -> doublePredicateSupplier.get().applyDouble(input);
-    }
-
-    @Override
-    public boolean equals(Object o)
-    {
-      if (this == o) {
-        return true;
-      }
-      if (o == null || getClass() != o.getClass()) {
-        return false;
-      }
-      InFilterDruidPredicateFactory that = (InFilterDruidPredicateFactory) o;
-      return Objects.equals(extractionFn, that.extractionFn) &&
-             Objects.equals(values, that.values);
-    }
-
-    @Override
-    public int hashCode()
-    {
-      return Objects.hash(extractionFn, values);
-    }
-  }
-}
diff --git a/processing/src/main/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapter.java b/processing/src/main/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapter.java
index e23a2d4..b5bef65 100644
--- a/processing/src/main/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapter.java
+++ b/processing/src/main/java/org/apache/druid/segment/join/HashJoinSegmentStorageAdapter.java
@@ -224,8 +224,7 @@ public class HashJoinSegmentStorageAdapter implements StorageAdapter
     final JoinFilterPreAnalysisKey keyCached = joinFilterPreAnalysis.getKey();
 
     if (!keyIn.equals(keyCached)) {
-      // It is a bug if this happens. We expect the comparison to be quick, because in the sane case, identical objects
-      // will be used and therefore deep equality checks will be unnecessary.
+      // It is a bug if this happens. The implied key and the cached key should always match.
       throw new ISE("Pre-analysis mismatch, cannot execute query");
     }
 
diff --git a/processing/src/test/java/org/apache/druid/segment/filter/InFilterTest.java b/processing/src/test/java/org/apache/druid/segment/filter/InFilterTest.java
index 6a0db84..aac0e63 100644
--- a/processing/src/test/java/org/apache/druid/segment/filter/InFilterTest.java
+++ b/processing/src/test/java/org/apache/druid/segment/filter/InFilterTest.java
@@ -337,11 +337,13 @@ public class InFilterTest extends BaseFilterTest
     assertFilterMatches(toInFilterWithFn("dim1", lookupFn, "N/A"), ImmutableList.of());
     assertFilterMatches(toInFilterWithFn("dim2", lookupFn, "a"), ImmutableList.of());
     assertFilterMatches(toInFilterWithFn("dim2", lookupFn, "HELLO"), ImmutableList.of("a", "d"));
-    assertFilterMatches(toInFilterWithFn("dim2", lookupFn, "HELLO", "BYE", "UNKNOWN"),
-            ImmutableList.of("a", "b", "c", "d", "e", "f"));
+    assertFilterMatches(
+        toInFilterWithFn("dim2", lookupFn, "HELLO", "BYE", "UNKNOWN"),
+        ImmutableList.of("a", "b", "c", "d", "e", "f")
+    );
 
     final Map<String, String> stringMap2 = ImmutableMap.of(
-            "a", "e"
+        "a", "e"
     );
     LookupExtractor mapExtractor2 = new MapLookupExtractor(stringMap2, false);
     LookupExtractionFn lookupFn2 = new LookupExtractionFn(mapExtractor2, true, null, false, true);
@@ -350,8 +352,8 @@ public class InFilterTest extends BaseFilterTest
     assertFilterMatches(toInFilterWithFn("dim0", lookupFn2, "a"), ImmutableList.of());
 
     final Map<String, String> stringMap3 = ImmutableMap.of(
-            "c", "500",
-            "100", "e"
+        "c", "500",
+        "100", "e"
     );
     LookupExtractor mapExtractor3 = new MapLookupExtractor(stringMap3, false);
     LookupExtractionFn lookupFn3 = new LookupExtractionFn(mapExtractor3, false, null, false, true);
@@ -364,8 +366,8 @@ public class InFilterTest extends BaseFilterTest
   @Test
   public void testRequiredColumnRewrite()
   {
-    InFilter filter = (InFilter) toInFilter("dim0", "a", "c").toFilter();
-    InFilter filter2 = (InFilter) toInFilter("dim1", "a", "c").toFilter();
+    InDimFilter filter = (InDimFilter) toInFilter("dim0", "a", "c").toFilter();
+    InDimFilter filter2 = (InDimFilter) toInFilter("dim1", "a", "c").toFilter();
 
     Assert.assertTrue(filter.supportsRequiredColumnRewrite());
     Assert.assertTrue(filter2.supportsRequiredColumnRewrite());
@@ -381,17 +383,17 @@ public class InFilterTest extends BaseFilterTest
   @Test
   public void test_equals()
   {
-    EqualsVerifier.forClass(InFilter.class)
+    EqualsVerifier.forClass(InDimFilter.class)
                   .usingGetClass()
                   .withNonnullFields("dimension", "values")
-                  .withIgnoredFields("longPredicateSupplier", "floatPredicateSupplier", "doublePredicateSupplier")
+                  .withIgnoredFields("cacheKeySupplier", "predicateFactory", "cachedOptimizedFilter")
                   .verify();
   }
 
   @Test
   public void test_equals_forInFilterDruidPredicateFactory()
   {
-    EqualsVerifier.forClass(InFilter.InFilterDruidPredicateFactory.class)
+    EqualsVerifier.forClass(InDimFilter.InFilterDruidPredicateFactory.class)
                   .usingGetClass()
                   .withNonnullFields("values")
                   .withIgnoredFields("longPredicateSupplier", "floatPredicateSupplier", "doublePredicateSupplier")


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@druid.apache.org
For additional commands, e-mail: commits-help@druid.apache.org