You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by gs...@apache.org on 2021/11/21 15:37:01 UTC

[lucene] branch main updated: LUCENE-10244: Please consider opening MultiCollector::getCollectors for public use (#455)

This is an automated email from the ASF dual-hosted git repository.

gsmiller pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new 5993b90  LUCENE-10244: Please consider opening MultiCollector::getCollectors for public use (#455)
5993b90 is described below

commit 5993b9050a21eb01e2a4c964605ee44364eace52
Author: Andriy Redko <an...@aiven.io>
AuthorDate: Sun Nov 21 10:36:54 2021 -0500

    LUCENE-10244: Please consider opening MultiCollector::getCollectors for public use (#455)
---
 .../org/apache/lucene/search/MultiCollector.java   |   2 +-
 .../apache/lucene/search/TestCollectorManager.java | 181 +++++++++++++++++++++
 2 files changed, 182 insertions(+), 1 deletion(-)

diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java b/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java
index a22afa3..09aea3a 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MultiCollector.java
@@ -150,7 +150,7 @@ public class MultiCollector implements Collector {
   }
 
   /** Provides access to the wrapped {@code Collector}s for advanced use-cases */
-  Collector[] getCollectors() {
+  public Collector[] getCollectors() {
     return collectors;
   }
 
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestCollectorManager.java b/lucene/core/src/test/org/apache/lucene/search/TestCollectorManager.java
new file mode 100644
index 0000000..749888f
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/search/TestCollectorManager.java
@@ -0,0 +1,181 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+import static org.hamcrest.CoreMatchers.instanceOf;
+
+import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Random;
+import java.util.Set;
+import java.util.function.Predicate;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestCollectorManager extends LuceneTestCase {
+
+  @SuppressWarnings("unchecked")
+  public void testCollection() throws IOException {
+    Directory dir = newDirectory();
+    DirectoryReader reader = reader(dir);
+    LeafReaderContext ctx = reader.leaves().get(0);
+
+    // Setup two collectors, one that will only collect even doc ids and one that
+    // only collects odd. Create some random doc ids and keep track of the ones that we
+    // expect each collector manager to collect:
+    Predicate<Integer> evenPredicate = val -> val % 2 == 0;
+    Predicate<Integer> oddPredicate = val -> val % 2 == 1;
+
+    CompositeCollectorManager cm =
+        new CompositeCollectorManager(Arrays.asList(evenPredicate, oddPredicate));
+
+    for (int iter = 0; iter < 100; iter++) {
+      int docs = RandomNumbers.randomIntBetween(random(), 1000, 10000);
+      List<Integer> expected = generateDocIds(docs, random());
+      IntStream expectedEven = expected.stream().filter(evenPredicate).mapToInt(i -> i);
+      IntStream expectedOdd = expected.stream().filter(oddPredicate).mapToInt(i -> i);
+
+      // Test only wrapping one of the collector managers:
+      Object result = collectAll(ctx, expected, cm);
+      assertThat(result, instanceOf(List.class));
+      IntStream intResults = ((List<Integer>) result).stream().mapToInt(i -> i);
+      assertArrayEquals(
+          IntStream.concat(expectedEven, expectedOdd).sorted().toArray(),
+          intResults.sorted().toArray());
+    }
+
+    reader.close();
+    dir.close();
+  }
+
+  public void testEmptyCollectors() {
+    assertThrows(
+        IllegalArgumentException.class,
+        () -> new CompositeCollectorManager(Collections.emptyList()).newCollector());
+  }
+
+  private static DirectoryReader reader(Directory dir) throws IOException {
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
+    iw.addDocument(new Document());
+    iw.commit();
+    DirectoryReader reader = iw.getReader();
+    iw.close();
+
+    return reader;
+  }
+
+  private static <C extends Collector> Object collectAll(
+      LeafReaderContext ctx, List<Integer> values, CollectorManager<C, ?> collectorManager)
+      throws IOException {
+    List<C> collectors = new ArrayList<>();
+    C collector = collectorManager.newCollector();
+    collectors.add(collector);
+    LeafCollector leafCollector = collector.getLeafCollector(ctx);
+    for (Integer v : values) {
+      if (random().nextInt(10) == 1) {
+        collector = collectorManager.newCollector();
+        collectors.add(collector);
+        leafCollector = collector.getLeafCollector(ctx);
+      }
+      leafCollector.collect(v);
+    }
+    return collectorManager.reduce(collectors);
+  }
+
+  /**
+   * Generate test doc ids. This will de-dupe and create a sorted list to be more realistic with
+   * real-world use-cases. Note that it's possible this will generate fewer than 'count' entries
+   * because of de-duping, but that should be quite rare and probably isn't worth worrying about for
+   * these testing purposes.
+   */
+  private List<Integer> generateDocIds(int count, Random random) {
+    Set<Integer> generated = new HashSet<>(count);
+    for (int i = 0; i < count; i++) {
+      generated.add(random.nextInt());
+    }
+
+    return generated.stream().sorted().collect(Collectors.toList());
+  }
+
+  private static final class CompositeCollectorManager
+      implements CollectorManager<Collector, List<Integer>> {
+    private final List<Predicate<Integer>> predicates;
+
+    CompositeCollectorManager(List<Predicate<Integer>> predicates) {
+      this.predicates = predicates;
+    }
+
+    @Override
+    public Collector newCollector() throws IOException {
+      return MultiCollector.wrap(
+          predicates.stream().map(SimpleListCollector::new).toArray(Collector[]::new));
+    }
+
+    @Override
+    public List<Integer> reduce(Collection<Collector> collectors) throws IOException {
+      List<Integer> all = new ArrayList<>();
+      for (Collector m : collectors) {
+        for (Collector c : ((MultiCollector) m).getCollectors()) {
+          all.addAll(((SimpleListCollector) c).collected);
+        }
+      }
+
+      return all;
+    }
+  }
+
+  private static final class SimpleListCollector implements Collector {
+    final Predicate<Integer> predicate;
+    final List<Integer> collected = new ArrayList<>();
+
+    SimpleListCollector(Predicate<Integer> predicate) {
+      this.predicate = predicate;
+    }
+
+    @Override
+    public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
+      return new LeafCollector() {
+        @Override
+        public void setScorer(Scorable scorer) throws IOException {}
+
+        @Override
+        public void collect(int doc) throws IOException {
+          if (predicate.test(doc)) {
+            collected.add(doc);
+          }
+        }
+      };
+    }
+
+    @Override
+    public ScoreMode scoreMode() {
+      return ScoreMode.COMPLETE;
+    }
+  }
+}