You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2015/01/05 19:06:23 UTC
svn commit: r1649601 - in /lucene/dev/branches/lucene_solr_4_10: ./ lucene/
lucene/CHANGES.txt lucene/core/
lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java
Author: mikemccand
Date: Mon Jan 5 18:06:23 2015
New Revision: 1649601
URL: http://svn.apache.org/r1649601
Log:
LUCENE-6161: reuse DocsEnum when resolving deleted terms/queries to doc id
Modified:
lucene/dev/branches/lucene_solr_4_10/ (props changed)
lucene/dev/branches/lucene_solr_4_10/lucene/ (props changed)
lucene/dev/branches/lucene_solr_4_10/lucene/CHANGES.txt
lucene/dev/branches/lucene_solr_4_10/lucene/core/ (props changed)
lucene/dev/branches/lucene_solr_4_10/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java
Modified: lucene/dev/branches/lucene_solr_4_10/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_10/lucene/CHANGES.txt?rev=1649601&r1=1649600&r2=1649601&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_10/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/lucene_solr_4_10/lucene/CHANGES.txt Mon Jan 5 18:06:23 2015
@@ -10,6 +10,10 @@ Bug fixes
* LUCENE-6019, LUCENE-6117: Remove -Dtests.assert to make IndexWriter
infoStream sane. (Robert Muir, Mike McCandless)
+* LUCENE-6161: Resolving deletes was failing to reuse DocsEnum likely
+ causing substantial performance cost for use cases that frequently
+ delete old documents (Mike McCandless)
+
======================= Lucene 4.10.3 ======================
Bug fixes
Modified: lucene/dev/branches/lucene_solr_4_10/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_10/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java?rev=1649601&r1=1649600&r2=1649601&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_10/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java (original)
+++ lucene/dev/branches/lucene_solr_4_10/lucene/core/src/java/org/apache/lucene/index/BufferedUpdatesStream.java Mon Jan 5 18:06:23 2015
@@ -393,7 +393,7 @@ class BufferedUpdatesStream implements A
TermsEnum termsEnum = null;
String currentField = null;
- DocsEnum docs = null;
+ DocsEnum docsEnum = null;
assert checkDeleteTerm(null);
@@ -416,36 +416,38 @@ class BufferedUpdatesStream implements A
}
if (termsEnum == null) {
+ // no terms in this field
continue;
}
+
assert checkDeleteTerm(term);
// System.out.println(" term=" + term);
if (termsEnum.seekExact(term.bytes())) {
// we don't need term frequencies for this
- DocsEnum docsEnum = termsEnum.docs(rld.getLiveDocs(), docs, DocsEnum.FLAG_NONE);
+ docsEnum = termsEnum.docs(rld.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
//System.out.println("BDS: got docsEnum=" + docsEnum);
- if (docsEnum != null) {
- while (true) {
- final int docID = docsEnum.nextDoc();
- //System.out.println(Thread.currentThread().getName() + " del term=" + term + " doc=" + docID);
- if (docID == DocIdSetIterator.NO_MORE_DOCS) {
- break;
- }
- if (!any) {
- rld.initWritableLiveDocs();
- any = true;
- }
- // NOTE: there is no limit check on the docID
- // when deleting by Term (unlike by Query)
- // because on flush we apply all Term deletes to
- // each segment. So all Term deleting here is
- // against prior segments:
- if (rld.delete(docID)) {
- delCount++;
- }
+ assert docsEnum != null;
+
+ while (true) {
+ final int docID = docsEnum.nextDoc();
+ //System.out.println(Thread.currentThread().getName() + " del term=" + term + " doc=" + docID);
+ if (docID == DocIdSetIterator.NO_MORE_DOCS) {
+ break;
+ }
+ if (!any) {
+ rld.initWritableLiveDocs();
+ any = true;
+ }
+ // NOTE: there is no limit check on the docID
+ // when deleting by Term (unlike by Query)
+ // because on flush we apply all Term deletes to
+ // each segment. So all Term deleting here is
+ // against prior segments:
+ if (rld.delete(docID)) {
+ delCount++;
}
}
}
@@ -475,7 +477,7 @@ class BufferedUpdatesStream implements A
String currentField = null;
TermsEnum termsEnum = null;
- DocsEnum docs = null;
+ DocsEnum docsEnum = null;
//System.out.println(Thread.currentThread().getName() + " numericDVUpdate reader=" + reader);
for (DocValuesUpdate update : updates) {
@@ -501,19 +503,19 @@ class BufferedUpdatesStream implements A
termsEnum = terms.iterator(termsEnum);
} else {
termsEnum = null;
- continue; // no terms in that field
}
}
if (termsEnum == null) {
+ // no terms in this field
continue;
}
+
// System.out.println(" term=" + term);
if (termsEnum.seekExact(term.bytes())) {
// we don't need term frequencies for this
- DocsEnum docsEnum = termsEnum.docs(rld.getLiveDocs(), docs, DocsEnum.FLAG_NONE);
-
+ docsEnum = termsEnum.docs(rld.getLiveDocs(), docsEnum, DocsEnum.FLAG_NONE);
//System.out.println("BDS: got docsEnum=" + docsEnum);
DocValuesFieldUpdates dvUpdates = dvUpdatesContainer.getUpdates(update.field, update.type);