You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@lucene.apache.org by GitBox <gi...@apache.org> on 2020/03/18 09:56:41 UTC
[GitHub] [lucene-solr] jimczi commented on a change in pull request #1316: LUCENE-8929 parallel early termination in TopFieldCollector using minmin score

jimczi commented on a change in pull request #1316: LUCENE-8929 parallel early termination in TopFieldCollector using minmin score
URL: https://github.com/apache/lucene-solr/pull/1316#discussion_r394215894
 
 

 ##########
 File path: lucene/core/src/java/org/apache/lucene/search/ParallelSortedCollector.java
 ##########
 @@ -0,0 +1,612 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.PriorityQueue;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.FieldValueHitQueue.Entry;
+import org.apache.lucene.search.TotalHits.Relation;
+
+/**
+ * A {@link Collector} for results sorted by field, optimized for early termination in
+ * the case where the {@link Sort} matches the index and the search is executed in parallel,
+ * using multiple threads.
+ *
+ * @lucene.experimental
+ */
+abstract class ParallelSortedCollector extends TopDocsCollector<Entry> {
+
+  private static final ScoreDoc[] EMPTY_SCOREDOCS = new ScoreDoc[0];
+
+  final int numHits;
+  final Sort sort;
+  final HitsThresholdChecker hitsThresholdChecker;
+  final FieldComparator<?> firstComparator;
+
+  // the current local minimum competitive score already propagated to the underlying scorer
+  float minCompetitiveScore;
+
+  // Enables global early termination with concurrent threads using minimum competitive scores and
+  // collected counts of all segments
+  final MaxScoreTerminator maxScoreTerminator;
+
+  final int numComparators;
+  FieldValueHitQueue.Entry bottom = null;
+  boolean queueFull;
+  int docBase;
+  final boolean needsScores;
+  final ScoreMode scoreMode;
+
+  // Declaring the constructor private prevents extending this class by anyone
+  // else. Note that the class cannot be final since it's extended by the
+  // internal versions. If someone will define a constructor with any other
+  // visibility, then anyone will be able to extend the class, which is not what
+  // we want.
+  private ParallelSortedCollector(FieldValueHitQueue<Entry> pq, int numHits, Sort sort,
+                                  HitsThresholdChecker hitsThresholdChecker, boolean needsScores,
+                                  MaxScoreTerminator maxScoreTerminator) {
+    super(pq);
+    this.needsScores = needsScores;
+    this.numHits = numHits;
+    this.sort = sort;
+    this.hitsThresholdChecker = hitsThresholdChecker;
+    this.maxScoreTerminator = maxScoreTerminator;
+    numComparators = pq.getComparators().length;
+    firstComparator = pq.getComparators()[0];
+    scoreMode = needsScores ? ScoreMode.COMPLETE : ScoreMode.COMPLETE_NO_SCORES;
+  }
+
+  private abstract class TopFieldLeafCollector implements LeafCollector {
+
+    final LeafFieldComparator comparator;
+    final int firstReverseMul;
+    final int reverseMul;
+    final LeafReaderContext context;
+    final MaxScoreTerminator.LeafState leafTerminationState;
+
+    private double score;
+    Scorable scorer;
+
+    TopFieldLeafCollector(FieldValueHitQueue<Entry> queue, LeafReaderContext context) throws IOException {
+      LeafFieldComparator[] comparators = queue.getComparators(context);
+      firstReverseMul = queue.reverseMul[0];
+      if (comparators.length == 1) {
+        this.reverseMul = queue.reverseMul[0];
+        this.comparator = comparators[0];
+      } else {
+        this.reverseMul = 1;
+        this.comparator = new MultiLeafFieldComparator(comparators, queue.reverseMul);
+      }
+      this.context = context;
+      leafTerminationState = maxScoreTerminator.addLeafState();
+    }
+
+    void countHit() {
+      ++totalHits;
+      // TODO: replace hitsThresholdChecker with something simpler
+      hitsThresholdChecker.incrementHitCount();
+    }
+
+    void collectHitIfCompetitive(int doc) throws IOException {
+      if (reverseMul * comparator.compareBottom(doc) > 0) {
 
 Review comment:
   if we specialize for parallel sorted  index we should get ride of this comparison ? A linked list should also be enough to keep track of the top N  per leave ?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@lucene.apache.org
For additional commands, e-mail: issues-help@lucene.apache.org