You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ho...@apache.org on 2022/04/28 01:14:12 UTC

[lucene] branch main updated: LUCENE-10292: Suggest: Fix FreeTextSuggester so that getCount() returned results consistent with lookup() during concurrent build()

This is an automated email from the ASF dual-hosted git repository.

hossman pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/lucene.git


The following commit(s) were added to refs/heads/main by this push:
     new a8d86ea6e8b LUCENE-10292: Suggest: Fix FreeTextSuggester so that getCount() returned results consistent with lookup() during concurrent build()
a8d86ea6e8b is described below

commit a8d86ea6e8b89ea0324e7f8b6e1d5213254274d5
Author: Chris Hostetter <ho...@apache.org>
AuthorDate: Wed Apr 27 18:14:01 2022 -0700

    LUCENE-10292: Suggest: Fix FreeTextSuggester so that getCount() returned results consistent with lookup() during concurrent build()
    
    Fix SuggestRebuildTestUtil to reliably surfice this kind of failure that was previously sporadic
---
 .../suggest/analyzing/FreeTextSuggester.java       | 13 +++--
 .../search/suggest/SuggestRebuildTestUtil.java     | 68 +++++++++++++++-------
 2 files changed, 55 insertions(+), 26 deletions(-)

diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java
index 4e5ff17c9ab..7e147c8bfe3 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java
@@ -140,7 +140,7 @@ public class FreeTextSuggester extends Lookup {
   private final byte separator;
 
   /** Number of entries the lookup was built with */
-  private long count = 0;
+  private volatile long count = 0;
 
   /**
    * The default character used to join multiple tokens into a single ngram token. The input tokens
@@ -273,7 +273,7 @@ public class FreeTextSuggester extends Lookup {
     IndexReader reader = null;
 
     boolean success = false;
-    count = 0;
+    long newCount = 0;
     try {
       while (true) {
         BytesRef surfaceForm = iterator.next();
@@ -282,7 +282,7 @@ public class FreeTextSuggester extends Lookup {
         }
         field.setStringValue(surfaceForm.utf8ToString());
         writer.addDocument(doc);
-        count++;
+        newCount++;
       }
       reader = DirectoryReader.open(writer);
 
@@ -320,10 +320,13 @@ public class FreeTextSuggester extends Lookup {
         fstCompiler.add(Util.toIntsRef(term, scratchInts), encodeWeight(termsEnum.totalTermFreq()));
       }
 
-      fst = fstCompiler.compile();
-      if (fst == null) {
+      final FST<Long> newFst = fstCompiler.compile();
+      if (newFst == null) {
         throw new IllegalArgumentException("need at least one suggestion");
       }
+      fst = newFst;
+      count = newCount;
+
       // System.out.println("FST: " + fst.getNodeCount() + " nodes");
 
       /*
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/SuggestRebuildTestUtil.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/SuggestRebuildTestUtil.java
index 14b74eeeab7..badb6cb0410 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/SuggestRebuildTestUtil.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/SuggestRebuildTestUtil.java
@@ -18,11 +18,13 @@ package org.apache.lucene.search.suggest;
 
 import static org.junit.Assert.assertNull;
 
+import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Iterator;
 import java.util.List;
+import java.util.Set;
 import java.util.concurrent.Semaphore;
 import java.util.concurrent.atomic.AtomicReference;
+import org.apache.lucene.util.BytesRef;
 
 /** Reusable Logic for confirming that Lookup impls can return suggestions during a 'rebuild' */
 public final class SuggestRebuildTestUtil {
@@ -57,25 +59,29 @@ public final class SuggestRebuildTestUtil {
     // modify source data we're going to build from, and spin up background thread that
     // will rebuild (slowly)
     data.addAll(extraData);
-    final Semaphore rebuildGate = new Semaphore(0);
+    final Semaphore readyToCheck = new Semaphore(0);
+    final Semaphore readyToAdvance = new Semaphore(0);
     final AtomicReference<Throwable> buildError = new AtomicReference<>();
     final Thread rebuilder =
         new Thread(
             () -> {
               try {
                 suggester.build(
-                    new InputArrayIterator(new DelayedIterator<>(rebuildGate, data.iterator())));
+                    new DelayedInputIterator(
+                        readyToCheck, readyToAdvance, new InputArrayIterator(data.iterator())));
               } catch (Throwable t) {
                 buildError.set(t);
               }
             });
     rebuilder.start();
     // at every stage of the slow rebuild, we should still be able to get our original suggestions
-    for (int i = 0; i < data.size(); i++) {
+    // (+1 iteration to ensure final next() call can return null)
+    for (int i = 0; i < data.size() + 1; i++) {
+      readyToCheck.acquire();
       initialChecks.check(suggester);
-      rebuildGate.release();
+      readyToAdvance.release();
     }
-    // once all the data is releasedfrom the iterator, the background rebuild should finish, and
+    // once all the data is released from the iterator, the background rebuild should finish, and
     // suggest results
     // should change
     rebuilder.join();
@@ -92,34 +98,54 @@ public final class SuggestRebuildTestUtil {
   }
 
   /**
-   * An iterator wrapper whose {@link Iterator#next} method will only return when a Semaphore permit
-   * is acquirable
+   * An InputArrayIterator wrapper whose {@link InputIterator#next} method releases on a Semaphore,
+   * and then acquires from a differnet Semaphore.
    */
-  private static final class DelayedIterator<E> implements Iterator<E> {
-    final Iterator<E> inner;
-    final Semaphore gate;
+  private static final class DelayedInputIterator implements InputIterator {
+    final Semaphore releaseOnNext;
+    final Semaphore acquireOnNext;
+    final InputIterator inner;
 
-    public DelayedIterator(final Semaphore gate, final Iterator<E> inner) {
-      assert null != gate;
+    public DelayedInputIterator(
+        final Semaphore releaseOnNext, final Semaphore acquireOnNext, final InputIterator inner) {
+      assert null != releaseOnNext;
+      assert null != acquireOnNext;
       assert null != inner;
-      this.gate = gate;
+      this.releaseOnNext = releaseOnNext;
+      this.acquireOnNext = acquireOnNext;
       this.inner = inner;
     }
 
     @Override
-    public boolean hasNext() {
-      return inner.hasNext();
+    public BytesRef next() throws IOException {
+      releaseOnNext.release();
+      acquireOnNext.acquireUninterruptibly();
+      return inner.next();
     }
 
     @Override
-    public E next() {
-      gate.acquireUninterruptibly();
-      return inner.next();
+    public long weight() {
+      return inner.weight();
+    }
+
+    @Override
+    public BytesRef payload() {
+      return inner.payload();
+    }
+
+    @Override
+    public boolean hasPayloads() {
+      return inner.hasPayloads();
+    }
+
+    @Override
+    public Set<BytesRef> contexts() {
+      return inner.contexts();
     }
 
     @Override
-    public void remove() {
-      inner.remove();
+    public boolean hasContexts() {
+      return inner.hasContexts();
     }
   }
 }