You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2015/01/05 19:06:23 UTC

svn commit: r1649601 - in /lucene/dev/branches/lucene_solr_4_10: ./ lucene/ lucene/CHANGES.txt lucene/core/ lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java

Author: mikemccand
Date: Mon Jan  5 18:06:23 2015
New Revision: 1649601

URL: http://svn.apache.org/r1649601
Log:
LUCENE-6161: reuse DocsEnum when resolving deleted terms/queries to doc id

Modified:
    lucene/dev/branches/lucene_solr_4_10/   (props changed)
    lucene/dev/branches/lucene_solr_4_10/lucene/   (props changed)
    lucene/dev/branches/lucene_solr_4_10/lucene/CHANGES.txt
    lucene/dev/branches/lucene_solr_4_10/lucene/core/   (props changed)
    lucene/dev/branches/lucene_solr_4_10/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java

Modified: lucene/dev/branches/lucene_solr_4_10/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_10/lucene/CHANGES.txt?rev=1649601&r1=1649600&r2=1649601&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_10/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/lucene_solr_4_10/lucene/CHANGES.txt Mon Jan  5 18:06:23 2015
@@ -10,6 +10,10 @@ Bug fixes
 * LUCENE-6019, LUCENE-6117: Remove -Dtests.assert to make IndexWriter
   infoStream sane.  (Robert Muir, Mike McCandless)
 
+* LUCENE-6161: Resolving deletes was failing to reuse DocsEnum likely
+  causing substantial performance cost for use cases that frequently
+  delete old documents (Mike McCandless)
+
 ======================= Lucene 4.10.3 ======================
 
 Bug fixes

Modified: lucene/dev/branches/lucene_solr_4_10/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_10/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java?rev=1649601&r1=1649600&r2=1649601&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_10/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java (original)
+++ lucene/dev/branches/lucene_solr_4_10/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java Mon Jan  5 18:06:23 2015
@@ -393,7 +393,7 @@ class BufferedUpdatesStream implements A
     TermsEnum termsEnum = null;
 
     String currentField = null;
-    DocsEnum docs = null;
+    DocsEnum docsEnum = null;
 
     assert checkDeleteTerm(null);
 
@@ -416,36 +416,38 @@ class BufferedUpdatesStream implements A
       }
 
       if (termsEnum == null) {
+        // no terms in this field
         continue;
       }
+
       assert checkDeleteTerm(term);
 
       // System.out.println("  term=" + term);
 
       if (termsEnum.seekExact(term.bytes())) {
         // we don't need term frequencies for this
-        DocsEnum docsEnum = termsEnum.docs(rld.getLiveDocs(), docs, DocsEnum.FLAG_NONE);
+        docsEnum = termsEnum.docs(rld.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
         //System.out.println("BDS: got docsEnum=" + docsEnum);
 
-        if (docsEnum != null) {
-          while (true) {
-            final int docID = docsEnum.nextDoc();
-            //System.out.println(Thread.currentThread().getName() + " del term=" + term + " doc=" + docID);
-            if (docID == DocIdSetIterator.NO_MORE_DOCS) {
-              break;
-            }   
-            if (!any) {
-              rld.initWritableLiveDocs();
-              any = true;
-            }
-            // NOTE: there is no limit check on the docID
-            // when deleting by Term (unlike by Query)
-            // because on flush we apply all Term deletes to
-            // each segment.  So all Term deleting here is
-            // against prior segments:
-            if (rld.delete(docID)) {
-              delCount++;
-            }
+        assert docsEnum != null;
+
+        while (true) {
+          final int docID = docsEnum.nextDoc();
+          //System.out.println(Thread.currentThread().getName() + " del term=" + term + " doc=" + docID);
+          if (docID == DocIdSetIterator.NO_MORE_DOCS) {
+            break;
+          }   
+          if (!any) {
+            rld.initWritableLiveDocs();
+            any = true;
+          }
+          // NOTE: there is no limit check on the docID
+          // when deleting by Term (unlike by Query)
+          // because on flush we apply all Term deletes to
+          // each segment.  So all Term deleting here is
+          // against prior segments:
+          if (rld.delete(docID)) {
+            delCount++;
           }
         }
       }
@@ -475,7 +477,7 @@ class BufferedUpdatesStream implements A
     
     String currentField = null;
     TermsEnum termsEnum = null;
-    DocsEnum docs = null;
+    DocsEnum docsEnum = null;
     
     //System.out.println(Thread.currentThread().getName() + " numericDVUpdate reader=" + reader);
     for (DocValuesUpdate update : updates) {
@@ -501,19 +503,19 @@ class BufferedUpdatesStream implements A
           termsEnum = terms.iterator(termsEnum);
         } else {
           termsEnum = null;
-          continue; // no terms in that field
         }
       }
 
       if (termsEnum == null) {
+        // no terms in this field
         continue;
       }
+
       // System.out.println("  term=" + term);
 
       if (termsEnum.seekExact(term.bytes())) {
         // we don't need term frequencies for this
-        DocsEnum docsEnum = termsEnum.docs(rld.getLiveDocs(), docs, DocsEnum.FLAG_NONE);
-      
+        docsEnum = termsEnum.docs(rld.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
         //System.out.println("BDS: got docsEnum=" + docsEnum);
 
         DocValuesFieldUpdates dvUpdates = dvUpdatesContainer.getUpdates(update.field, update.type);