You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2015/04/10 19:53:18 UTC

svn commit: r1672713 - in /lucene/dev/branches/branch_5x: ./ lucene/ lucene/core/ lucene/core/src/java/org/apache/lucene/index/ lucene/core/src/test/org/apache/lucene/index/

Author: mikemccand
Date: Fri Apr 10 17:53:17 2015
New Revision: 1672713

URL: http://svn.apache.org/r1672713
Log:
LUCENE-6315: simplify the private iterator Lucene uses when resolving deleted terms -> docids

Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/lucene/   (props changed)
    lucene/dev/branches/branch_5x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_5x/lucene/core/   (props changed)
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/FieldTermIterator.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/MergedPrefixCodedTermsIterator.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/PrefixCodedTerms.java
    lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestDocumentsWriterDeleteQueue.java
    lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestPrefixCodedTerms.java

Modified: lucene/dev/branches/branch_5x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/CHANGES.txt?rev=1672713&r1=1672712&r2=1672713&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/CHANGES.txt Fri Apr 10 17:53:17 2015
@@ -55,6 +55,10 @@ Other
 
 * LUCENE-5439: Add 'ant jacoco' build target. (Robert Muir)
 
+* LUCENE-6315: Simplify the private iterator Lucene uses internally
+  when resolving deleted terms to matched docids. (Robert Muir, Adrien
+  Grand, Mike McCandless)
+
 ======================= Lucene 5.1.0 =======================
 
 New Features

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java?rev=1672713&r1=1672712&r2=1672713&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java Fri Apr 10 17:53:17 2015
@@ -32,7 +32,6 @@ import org.apache.lucene.search.DocIdSet
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.QueryWrapperFilter;
-import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.util.Accountable;
 import org.apache.lucene.util.BytesRef;
@@ -334,12 +333,6 @@ class BufferedUpdatesStream implements A
     }
 
     if (infoStream.isEnabled("BD")) {
-      Directory dir;
-      if (segmentInfos.size() > 0) {
-        dir = segmentInfos.info(0).info.dir;
-      } else {
-        dir = null;
-      }
       infoStream.message("BD", "prune sis=" + segmentInfos + " minGen=" + minGen + " packetCount=" + updates.size());
     }
     final int limit = updates.size();
@@ -495,18 +488,13 @@ class BufferedUpdatesStream implements A
     String field = null;
     SegmentQueue queue = null;
 
-    while (true) {
-
-      boolean newField;
+    BytesRef term;
 
-      newField = iter.next();
+    while ((term = iter.next()) != null) {
 
-      if (newField) {
+      if (iter.field() != field) {
+        // field changed
         field = iter.field();
-        if (field == null) {
-          // No more terms:
-          break;
-        }
 
         queue = new SegmentQueue(numReaders);
 
@@ -527,9 +515,8 @@ class BufferedUpdatesStream implements A
         assert checkDeleteTerm(null);
       }
 
-      // Get next term to delete
-      BytesRef term = iter.term();
       assert checkDeleteTerm(term);
+
       delTermVisitedCount++;
 
       long delGen = iter.delGen();

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/FieldTermIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/FieldTermIterator.java?rev=1672713&r1=1672712&r2=1672713&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/FieldTermIterator.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/FieldTermIterator.java Fri Apr 10 17:53:17 2015
@@ -17,24 +17,24 @@ package org.apache.lucene.index;
  * limitations under the License.
  */
 
-import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefIterator;
 
 // TODO: maybe TermsFilter could use this?
 
-/** Iterates over terms in multiple fields, notifying the caller when a new field is started. */
-interface FieldTermIterator {
-  /** Advances to the next term, returning true if it's in a new field or there are no more terms.  Call {@link #field} to see which
-   *  field; if that returns null then the iteration ended. */
-  boolean next();
-
-  /** Returns current field, or null if the iteration ended. */
-  String field();
-
-  /** Returns current term. */
-  BytesRef term();
+/** Iterates over terms in across multiple fields.  The caller must
+ *  check {@link #field} after each {@link #next} to see if the field
+ *  changed, but {@code ==} can be used since the iterator
+ *  implementation ensures it will use the same String instance for
+ *  a given field. */
+
+abstract class FieldTermIterator implements BytesRefIterator {
+  /** Returns current field.  This method should not be called
+   *  after iteration is done.  Note that you may use == to
+   *  detect a change in field. */
+  abstract String field();
 
   /** Del gen of the current term. */
   // TODO: this is really per-iterator not per term, but when we use MergedPrefixCodedTermsIterator we need to know which iterator we are on
-  long delGen();
+  abstract long delGen();
 }
 

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/MergedPrefixCodedTermsIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/MergedPrefixCodedTermsIterator.java?rev=1672713&r1=1672712&r2=1672713&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/MergedPrefixCodedTermsIterator.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/MergedPrefixCodedTermsIterator.java Fri Apr 10 17:53:17 2015
@@ -24,7 +24,7 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.PriorityQueue;
 
 /** Merges multiple {@link FieldTermIterator}s */
-class MergedPrefixCodedTermsIterator implements FieldTermIterator {
+class MergedPrefixCodedTermsIterator extends FieldTermIterator {
 
   private static class TermMergeQueue extends PriorityQueue<TermIterator> {
     TermMergeQueue(int size) {
@@ -74,54 +74,52 @@ class MergedPrefixCodedTermsIterator imp
   String field;
 
   @Override
-  public boolean next() {
+  public BytesRef next() {
     if (termQueue.size() == 0) {
-      // Current field is done:
+      // No more terms in current field:
       if (fieldQueue.size() == 0) {
         // No more fields:
         field = null;
-        return true;
+        return null;
       }
 
       // Transfer all iterators on the next field into the term queue:
       TermIterator top = fieldQueue.pop();
       termQueue.add(top);
-      assert top.field() != null;
+      field = top.field;
+      assert field != null;
 
       while (fieldQueue.size() != 0 && fieldQueue.top().field.equals(top.field)) {
-        termQueue.add(fieldQueue.pop());
+        TermIterator iter = fieldQueue.pop();
+        assert iter.field.equals(field);
+        // TODO: a little bit evil; we do this so we can == on field down below:
+        iter.field = field;
+        termQueue.add(iter);
       }
 
-      field = top.field;
-      return true;
+      return termQueue.top().bytes;
     } else {
       TermIterator top = termQueue.top();
-      if (top.next()) {
-        // New field
+      if (top.next() == null) {
+        termQueue.pop();
+      } else if (top.field() != field) {
+        // Field changed
         termQueue.pop();
-        if (top.field() != null) {
-          fieldQueue.add(top);
-        }
+        fieldQueue.add(top);
       } else {
         termQueue.updateTop();
       }
-
-      if (termQueue.size() != 0) {
-        // Still terms left in this field
-        return false;
-      } else {
+      if (termQueue.size() == 0) {
         // Recurse (just once) to go to next field:                                                                                                                                        
         return next();
+      } else {
+        // Still terms left in this field
+        return termQueue.top().bytes;
       }
     }
   }
 
   @Override
-  public BytesRef term() {
-    return termQueue.top().bytes;
-  }
-
-  @Override
   public String field() {
     return field;
   }

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/PrefixCodedTerms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/PrefixCodedTerms.java?rev=1672713&r1=1672712&r2=1672713&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/PrefixCodedTerms.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/PrefixCodedTerms.java Fri Apr 10 17:53:17 2015
@@ -111,7 +111,7 @@ class PrefixCodedTerms implements Accoun
     }
   }
 
-  public static class TermIterator implements FieldTermIterator {
+  public static class TermIterator extends FieldTermIterator {
     final IndexInput input;
     final BytesRefBuilder builder = new BytesRefBuilder();
     final BytesRef bytes = builder.get();
@@ -130,7 +130,7 @@ class PrefixCodedTerms implements Accoun
     }
 
     @Override
-    public boolean next() {
+    public BytesRef next() {
       if (input.getFilePointer() < end) {
         try {
           int code = input.readVInt();
@@ -141,13 +141,13 @@ class PrefixCodedTerms implements Accoun
           int prefix = code >>> 1;
           int suffix = input.readVInt();
           readTermBytes(prefix, suffix);
-          return newField;
+          return bytes;
         } catch (IOException e) {
           throw new RuntimeException(e);
         }
       } else {
         field = null;
-        return true;
+        return null;
       }
     }
 
@@ -159,11 +159,6 @@ class PrefixCodedTerms implements Accoun
     }
 
     @Override
-    public BytesRef term() {
-      return bytes;
-    }
-
-    @Override
     public String field() {
       return field;
     }

Modified: lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestDocumentsWriterDeleteQueue.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestDocumentsWriterDeleteQueue.java?rev=1672713&r1=1672712&r2=1672713&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestDocumentsWriterDeleteQueue.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestDocumentsWriterDeleteQueue.java Fri Apr 10 17:53:17 2015
@@ -79,17 +79,9 @@ public class TestDocumentsWriterDeleteQu
     HashSet<Term> frozenSet = new HashSet<>();
     BytesRefBuilder bytesRef = new BytesRefBuilder();
     TermIterator iter = queue.freezeGlobalBuffer(null).termIterator();
-    String field = null;
-    while (true) {
-      boolean newField = iter.next();
-      if (newField) {
-        field = iter.field;
-        if (field == null) {
-          break;
-        }
-      }
+    while (iter.next() != null) {
       bytesRef.copyBytes(iter.bytes);
-      frozenSet.add(new Term(field, bytesRef.toBytesRef()));
+      frozenSet.add(new Term(iter.field(), bytesRef.toBytesRef()));
     }
     assertEquals(uniqueValues, frozenSet);
     assertEquals("num deletes must be 0 after freeze", 0, queue
@@ -110,16 +102,12 @@ public class TestDocumentsWriterDeleteQu
     queue.clear();
     assertFalse(queue.anyChanges());
     final int size = 200 + random().nextInt(500) * RANDOM_MULTIPLIER;
-    int termsSinceFreeze = 0;
-    int queriesSinceFreeze = 0;
     for (int i = 0; i < size; i++) {
       Term term = new Term("id", "" + i);
       if (random().nextInt(10) == 0) {
         queue.addDelete(new TermQuery(term));
-        queriesSinceFreeze++;
       } else {
         queue.addDelete(term);
-        termsSinceFreeze++;
       }
       assertTrue(queue.anyChanges());
       if (random().nextInt(10) == 0) {
@@ -218,17 +206,9 @@ public class TestDocumentsWriterDeleteQu
     BytesRefBuilder builder = new BytesRefBuilder();
 
     TermIterator iter = queue.freezeGlobalBuffer(null).termIterator();
-    String field = null;
-    while (true) {
-      boolean newField = iter.next();
-      if (newField) {
-        field = iter.field;
-        if (field == null) {
-          break;
-        }
-      }
+    while (iter.next() != null) {
       builder.copyBytes(iter.bytes);
-      frozenSet.add(new Term(field, builder.toBytesRef()));
+      frozenSet.add(new Term(iter.field(), builder.toBytesRef()));
     }
 
     assertEquals("num deletes must be 0 after freeze", 0, queue

Modified: lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestPrefixCodedTerms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestPrefixCodedTerms.java?rev=1672713&r1=1672712&r2=1672713&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestPrefixCodedTerms.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestPrefixCodedTerms.java Fri Apr 10 17:53:17 2015
@@ -33,8 +33,7 @@ public class TestPrefixCodedTerms extend
     PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder();
     PrefixCodedTerms pb = b.finish();
     TermIterator iter = pb.iterator();
-    assertTrue(iter.next());
-    assertNull(iter.field);
+    assertNull(iter.next());
   }
   
   public void testOne() {
@@ -43,11 +42,10 @@ public class TestPrefixCodedTerms extend
     b.add(term);
     PrefixCodedTerms pb = b.finish();
     TermIterator iter = pb.iterator();
-    assertTrue(iter.next());
-    assertEquals("foo", iter.field);
+    assertNotNull(iter.next());
+    assertEquals("foo", iter.field());
     assertEquals("bogus", iter.bytes.utf8ToString());
-    assertTrue(iter.next());
-    assertNull(iter.field);
+    assertNull(iter.next());
   }
   
   public void testRandom() {
@@ -66,19 +64,10 @@ public class TestPrefixCodedTerms extend
     
     TermIterator iter = pb.iterator();
     Iterator<Term> expected = terms.iterator();
-    String field = "";
     //System.out.println("TEST: now iter");
-    while (true) {
-      boolean newField = iter.next();
-      //System.out.println("  newField=" + newField);
-      if (newField) {
-        field = iter.field;
-        if (field == null) {
-          break;
-        }
-      }
+    while (iter.next() != null) {
       assertTrue(expected.hasNext());
-      assertEquals(expected.next(), new Term(field, iter.bytes));
+      assertEquals(expected.next(), new Term(iter.field(), iter.bytes));
     }
 
     assertFalse(expected.hasNext());
@@ -97,13 +86,14 @@ public class TestPrefixCodedTerms extend
     PrefixCodedTerms pb2 = b2.finish();
 
     MergedPrefixCodedTermsIterator merged = new MergedPrefixCodedTermsIterator(Arrays.asList(new PrefixCodedTerms[] {pb1, pb2}));
-    assertTrue(merged.next());
+    BytesRef term = merged.next();
+    assertNotNull(term);
     assertEquals("foo", merged.field());
-    assertEquals("a", merged.term().utf8ToString());
-    assertFalse(merged.next());
-    assertEquals("b", merged.term().utf8ToString());
-    assertTrue(merged.next());
-    assertNull(merged.field());
+    assertEquals("a", term.utf8ToString());
+    term = merged.next();
+    assertNotNull(term);
+    assertEquals("b", term.utf8ToString());
+    assertNull(merged.next());
   }
 
   @SuppressWarnings({"unchecked","rawtypes"})
@@ -137,25 +127,20 @@ public class TestPrefixCodedTerms extend
     String field = "";
 
     BytesRef lastTerm = null;
-
-    while (true) {
-      if (actual.next()) {
+    BytesRef term;
+    while ((term = actual.next()) != null) {
+      if (field != actual.field()) {
         field = actual.field();
-        if (field == null) {
-          break;
-        }
         lastTerm = null;
-        //System.out.println("\nTEST: new field: " + field);
       }
-      if (lastTerm != null && lastTerm.equals(actual.term())) {
+      if (lastTerm != null && lastTerm.equals(term)) {
         continue;
       }
-      //System.out.println("TEST: iter: field=" + field + " term=" + actual.term());
-      lastTerm = BytesRef.deepCopyOf(actual.term());
+      lastTerm = BytesRef.deepCopyOf(term);
       assertTrue(expected.hasNext());
 
       Term expectedTerm = expected.next();
-      assertEquals(expectedTerm, new Term(field, actual.term()));
+      assertEquals(expectedTerm, new Term(field, term));
     }
 
     assertFalse(expected.hasNext());