You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2021/05/18 06:49:59 UTC

[lucene] branch main updated: LUCENE-9960: Avoid unnecessary top element replacement for equal elements in PriorityQueue. (#141)

This is an automated email from the ASF dual-hosted git repository.

dweiss pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new ba9fee5  LUCENE-9960: Avoid unnecessary top element replacement for equal elements in PriorityQueue. (#141)
ba9fee5 is described below

commit ba9fee502b0c18fda312da55c6304ac8a463f509
Author: Dawid Weiss <da...@carrotsearch.com>
AuthorDate: Tue May 18 08:49:53 2021 +0200

    LUCENE-9960: Avoid unnecessary top element replacement for equal elements in PriorityQueue. (#141)
---
 lucene/CHANGES.txt                                 |  2 +
 .../java/org/apache/lucene/util/PriorityQueue.java | 19 ++++---
 .../org/apache/lucene/util/TestPriorityQueue.java  | 61 +++++++++++++++++-----
 3 files changed, 61 insertions(+), 21 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 1a356fa..d852f08 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -118,6 +118,8 @@ API Changes
 
 Improvements
 
+* LUCENE-9960: Avoid unnecessary top element replacement for equal elements in PriorityQueue. (Dawid Weiss)
+
 * LUCENE-9687: Hunspell support improvements: add API for spell-checking and suggestions, support compound words,
   fix various behavior differences between Java and C++ implementations, improve performance (Peter Gromov, Dawid Weiss)
 
diff --git a/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java b/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java
index a674225..fbd06ab 100644
--- a/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java
+++ b/lucene/core/src/java/org/apache/lucene/util/PriorityQueue.java
@@ -21,7 +21,7 @@ import java.util.NoSuchElementException;
 import java.util.function.Supplier;
 
 /**
- * A PriorityQueue maintains a partial ordering of its elements such that the least element can
+ * A priority queue maintains a partial ordering of its elements such that the least element can
  * always be found in constant time. Put()'s and pop()'s require log(size) time but the remove()
  * cost implemented here is linear.
  *
@@ -74,11 +74,11 @@ public abstract class PriorityQueue<T> implements Iterable<T> {
    */
   public PriorityQueue(int maxSize, Supplier<T> sentinelObjectSupplier) {
     final int heapSize;
+
     if (0 == maxSize) {
       // We allocate 1 extra to avoid if statement in top()
       heapSize = 2;
     } else {
-
       if ((maxSize < 0) || (maxSize >= ArrayUtil.MAX_ARRAY_LENGTH)) {
         // Throw exception to prevent confusing OOME:
         throw new IllegalArgumentException(
@@ -89,7 +89,8 @@ public abstract class PriorityQueue<T> implements Iterable<T> {
       // 1-based not 0-based.  heap[0] is unused.
       heapSize = maxSize + 1;
     }
-    // T is unbounded type, so this unchecked cast works always:
+
+    // T is an unbounded type, so this unchecked cast works always.
     @SuppressWarnings("unchecked")
     final T[] h = (T[]) new Object[heapSize];
     this.heap = h;
@@ -121,9 +122,11 @@ public abstract class PriorityQueue<T> implements Iterable<T> {
    * @return the new 'top' element in the queue.
    */
   public final T add(T element) {
-    size++;
-    heap[size] = element;
-    upHeap(size);
+    // don't modify size until we know heap access didn't throw AIOOB.
+    int index = size + 1;
+    heap[index] = element;
+    size = index;
+    upHeap(index);
     return heap[1];
   }
 
@@ -138,7 +141,7 @@ public abstract class PriorityQueue<T> implements Iterable<T> {
     if (size < maxSize) {
       add(element);
       return null;
-    } else if (size > 0 && !lessThan(element, heap[1])) {
+    } else if (size > 0 && lessThan(heap[1], element)) {
       T ret = heap[1];
       heap[1] = element;
       updateTop();
@@ -273,7 +276,7 @@ public abstract class PriorityQueue<T> implements Iterable<T> {
    * @lucene.internal
    */
   protected final Object[] getHeapArray() {
-    return (Object[]) heap;
+    return heap;
   }
 
   @Override
diff --git a/lucene/core/src/test/org/apache/lucene/util/TestPriorityQueue.java b/lucene/core/src/test/org/apache/lucene/util/TestPriorityQueue.java
index 90fb087..b580d14 100644
--- a/lucene/core/src/test/org/apache/lucene/util/TestPriorityQueue.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestPriorityQueue.java
@@ -21,6 +21,8 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.NoSuchElementException;
 import java.util.Random;
+import org.hamcrest.MatcherAssert;
+import org.hamcrest.Matchers;
 
 public class TestPriorityQueue extends LuceneTestCase {
 
@@ -47,6 +49,50 @@ public class TestPriorityQueue extends LuceneTestCase {
     }
   }
 
+  public void testZeroSizedQueue() {
+    PriorityQueue<Integer> pq = new IntegerQueue(0);
+    assertEquals((Object) 1, pq.insertWithOverflow(1));
+    assertEquals(0, pq.size());
+
+    // should fail, but passes and modifies the top...
+    pq.add(1);
+    assertEquals((Object) 1, pq.top());
+  }
+
+  public void testNoExtraWorkOnEqualElements() {
+    class Value {
+      private final int index;
+      private final int value;
+
+      Value(int index, int value) {
+        this.index = index;
+        this.value = value;
+      }
+    }
+
+    PriorityQueue<Value> pq =
+        new PriorityQueue<>(5) {
+          @Override
+          protected boolean lessThan(Value a, Value b) {
+            return a.value < b.value;
+          }
+        };
+
+    // Make all elements equal but record insertion order.
+    for (int i = 0; i < 100; i++) {
+      pq.insertWithOverflow(new Value(i, 0));
+    }
+
+    ArrayList<Integer> indexes = new ArrayList<>();
+    for (Value e : pq) {
+      indexes.add(e.index);
+    }
+
+    // All elements are "equal" so we should have exactly the indexes of those elements that were
+    // added first.
+    MatcherAssert.assertThat(indexes, Matchers.containsInAnyOrder(0, 1, 2, 3, 4));
+  }
+
   public void testPQ() throws Exception {
     testPQ(atLeast(10000), random());
   }
@@ -61,13 +107,6 @@ public class TestPriorityQueue extends LuceneTestCase {
       pq.add(next);
     }
 
-    //      Date end = new Date();
-
-    //      System.out.print(((float)(end.getTime()-start.getTime()) / count) * 1000);
-    //      System.out.println(" microseconds/put");
-
-    //      start = new Date();
-
     int last = Integer.MIN_VALUE;
     for (int i = 0; i < count; i++) {
       Integer next = pq.pop();
@@ -77,10 +116,6 @@ public class TestPriorityQueue extends LuceneTestCase {
     }
 
     assertEquals(sum, sum2);
-    //      end = new Date();
-
-    //      System.out.print(((float)(end.getTime()-start.getTime()) / count) * 1000);
-    //      System.out.println(" microseconds/pop");
   }
 
   public void testClear() {
@@ -141,7 +176,7 @@ public class TestPriorityQueue extends LuceneTestCase {
       if (evicted != null) {
         assertTrue(sds.remove(evicted));
         if (evicted != newEntry) {
-          assertTrue(evicted == lastLeast);
+          assertSame(evicted, lastLeast);
         }
       }
       Integer newLeast = pq.top();
@@ -159,7 +194,7 @@ public class TestPriorityQueue extends LuceneTestCase {
     for (int p = 0; p < 500000; p++) {
       int element = (int) (random.nextFloat() * (sds.size() - 1));
       Integer objectToRemove = sds.get(element);
-      assertTrue(sds.remove(element) == objectToRemove);
+      assertSame(sds.remove(element), objectToRemove);
       assertTrue(pq.remove(objectToRemove));
       pq.checkValidity();
       Integer newEntry = Math.abs(random.nextInt());