You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ja...@apache.org on 2023/05/26 11:44:46 UTC

[lucene] branch main updated: Optimize ConjunctionDISI.createConjunction (#12328)

This is an automated email from the ASF dual-hosted git repository.

javanna pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new fd758073506 Optimize ConjunctionDISI.createConjunction (#12328)
fd758073506 is described below

commit fd75807350604fbff5d571ec9b31e3669d17059e
Author: Armin Braun <me...@obrown.io>
AuthorDate: Fri May 26 13:44:39 2023 +0200

    Optimize ConjunctionDISI.createConjunction (#12328)
    
    This method is showing up as a little hot when profiling some queries.
    Almost all the time spent in this method is just burnt on ceremony
    around stream indirections that don't inline.
    Moving this to iterators, simplifying the check for same doc id and also saving one iteration (for the min
    cost) makes this method far cheaper and easier to read.
---
 lucene/CHANGES.txt                                 |  2 +
 .../org/apache/lucene/search/ConjunctionDISI.java  | 47 +++++++++-------------
 2 files changed, 22 insertions(+), 27 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 453c3695a48..faf85efa31a 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -76,6 +76,8 @@ Optimizations
 
 * GITHUB#11857, GITHUB#11859, GITHUB#11893, GITHUB#11909: Hunspell: improved suggestion performance (Peter Gromov)
 
+* GITHUB#12328: Optimize ConjunctionDISI.createConjunction (Armin Braun)
+
 Bug Fixes
 ---------------------
 
diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java
index b70224f1ec5..4fc74b4f087 100644
--- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java
@@ -20,7 +20,6 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
-import java.util.Comparator;
 import java.util.List;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BitSet;
@@ -99,24 +98,26 @@ final class ConjunctionDISI extends DocIdSetIterator {
         allIterators.size() > 0
             ? allIterators.get(0).docID()
             : twoPhaseIterators.get(0).approximation.docID();
-    boolean iteratorsOnTheSameDoc = allIterators.stream().allMatch(it -> it.docID() == curDoc);
-    iteratorsOnTheSameDoc =
-        iteratorsOnTheSameDoc
-            && twoPhaseIterators.stream().allMatch(it -> it.approximation().docID() == curDoc);
-    if (iteratorsOnTheSameDoc == false) {
-      throw new IllegalArgumentException(
-          "Sub-iterators of ConjunctionDISI are not on the same document!");
+    long minCost = Long.MAX_VALUE;
+    for (DocIdSetIterator allIterator : allIterators) {
+      if (allIterator.docID() != curDoc) {
+        throwSubIteratorsNotOnSameDocument();
+      }
+      minCost = Math.min(allIterator.cost(), minCost);
+    }
+    for (TwoPhaseIterator it : twoPhaseIterators) {
+      if (it.approximation().docID() != curDoc) {
+        throwSubIteratorsNotOnSameDocument();
+      }
     }
-
-    long minCost = allIterators.stream().mapToLong(DocIdSetIterator::cost).min().getAsLong();
     List<BitSetIterator> bitSetIterators = new ArrayList<>();
     List<DocIdSetIterator> iterators = new ArrayList<>();
     for (DocIdSetIterator iterator : allIterators) {
-      if (iterator.cost() > minCost && iterator instanceof BitSetIterator) {
+      if (iterator instanceof BitSetIterator bitSetIterator && bitSetIterator.cost() > minCost) {
         // we put all bitset iterators into bitSetIterators
         // except if they have the minimum cost, since we need
         // them to lead the iteration in that case
-        bitSetIterators.add((BitSetIterator) iterator);
+        bitSetIterators.add(bitSetIterator);
       } else {
         iterators.add(iterator);
       }
@@ -142,6 +143,11 @@ final class ConjunctionDISI extends DocIdSetIterator {
     return disi;
   }
 
+  private static void throwSubIteratorsNotOnSameDocument() {
+    throw new IllegalArgumentException(
+        "Sub-iterators of ConjunctionDISI are not on the same document!");
+  }
+
   final DocIdSetIterator lead1, lead2;
   final DocIdSetIterator[] others;
 
@@ -150,14 +156,7 @@ final class ConjunctionDISI extends DocIdSetIterator {
 
     // Sort the array the first time to allow the least frequent DocsEnum to
     // lead the matching.
-    CollectionUtil.timSort(
-        iterators,
-        new Comparator<DocIdSetIterator>() {
-          @Override
-          public int compare(DocIdSetIterator o1, DocIdSetIterator o2) {
-            return Long.compare(o1.cost(), o2.cost());
-          }
-        });
+    CollectionUtil.timSort(iterators, (o1, o2) -> Long.compare(o1.cost(), o2.cost()));
     lead1 = iterators.get(0);
     lead2 = iterators.get(1);
     others = iterators.subList(2, iterators.size()).toArray(new DocIdSetIterator[0]);
@@ -326,13 +325,7 @@ final class ConjunctionDISI extends DocIdSetIterator {
       assert twoPhaseIterators.size() > 0;
 
       CollectionUtil.timSort(
-          twoPhaseIterators,
-          new Comparator<TwoPhaseIterator>() {
-            @Override
-            public int compare(TwoPhaseIterator o1, TwoPhaseIterator o2) {
-              return Float.compare(o1.matchCost(), o2.matchCost());
-            }
-          });
+          twoPhaseIterators, (o1, o2) -> Float.compare(o1.matchCost(), o2.matchCost()));
 
       this.twoPhaseIterators =
           twoPhaseIterators.toArray(new TwoPhaseIterator[twoPhaseIterators.size()]);