You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2009/11/20 19:29:53 UTC
svn commit: r882671 - in /lucene/java/branches/flex_1458/src:
java/org/apache/lucene/index/ java/org/apache/lucene/index/codecs/standard/
test/org/apache/lucene/index/
Author: mikemccand
Date: Fri Nov 20 18:29:53 2009
New Revision: 882671
URL: http://svn.apache.org/viewvc?rev=882671&view=rev
Log:
LUCENE-1458 (on flex branch): fix deletes resolution (in IW) to use TermsEnum sequentially
Modified:
lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocumentsWriter.java
lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/IndexReader.java
lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermsEnum.java
lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestTransactions.java
Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=882671&r1=882670&r2=882671&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocumentsWriter.java Fri Nov 20 18:29:53 2009
@@ -977,19 +977,45 @@
// Delete by term
try {
+ Fields fields = reader.fields();
+ TermsEnum termsEnum = null;
+
+ String currentField = null;
+ TermRef termRef = new TermRef();
for (Entry<Term, BufferedDeletes.Num> entry: deletesFlushed.terms.entrySet()) {
Term term = entry.getKey();
- DocsEnum docs = reader.termDocsEnum(reader.getDeletedDocs(), term.field, new TermRef(term.text));
- if (docs != null) {
- int limit = entry.getValue().getNum();
- while (true) {
- final int docID = docs.next();
- if (docID == DocsEnum.NO_MORE_DOCS || docIDStart+docID >= limit) {
- break;
+ // Since we visit terms sorted, we gain performance
+ // by re-using the same TermsEnum and seeking only
+ // forwards
+ if (term.field() != currentField) {
+ currentField = term.field();
+ Terms terms = fields.terms(currentField);
+ if (terms != null) {
+ termsEnum = terms.iterator();
+ } else {
+ termsEnum = null;
+ }
+ }
+
+ if (termsEnum == null) {
+ continue;
+ }
+
+ termRef.copy(term.text());
+ if (termsEnum.seek(termRef) == TermsEnum.SeekStatus.FOUND) {
+ DocsEnum docs = termsEnum.docs(reader.getDeletedDocs());
+
+ if (docs != null) {
+ int limit = entry.getValue().getNum();
+ while (true) {
+ final int docID = docs.next();
+ if (docID == DocsEnum.NO_MORE_DOCS || docIDStart+docID >= limit) {
+ break;
+ }
+ reader.deleteDocument(docID);
+ any = true;
}
- reader.deleteDocument(docID);
- any = true;
}
}
}
Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/IndexReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/IndexReader.java?rev=882671&r1=882670&r2=882671&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/IndexReader.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/IndexReader.java Fri Nov 20 18:29:53 2009
@@ -927,6 +927,7 @@
}
private static final NullDocsEnum nullDocsEnum = new NullDocsEnum();
+ // nocommit -- tap into per-thread cache, here?
// nocommit -- should we return null or NullDocsEnum?
/** Returns DocsEnum for the specified field & term. */
public DocsEnum termDocsEnum(Bits skipDocs, String field, TermRef term) throws IOException {
Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermsEnum.java?rev=882671&r1=882670&r2=882671&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermsEnum.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermsEnum.java Fri Nov 20 18:29:53 2009
@@ -74,6 +74,7 @@
* {@link SeekStatus#END}.*/
public abstract int docFreq();
+ // nocommit -- clarify if this may return null
/** Get {@link DocsEnum} for the current term. The
* returned {@link DocsEnum} may share state with this
* TermsEnum instance, so you should not call this
Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java?rev=882671&r1=882670&r2=882671&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java Fri Nov 20 18:29:53 2009
@@ -231,6 +231,24 @@
}
}
+ // nocommit -- figure out how to do this one: we want to
+ // reuse the thread private TermsEnum, but, get a
+ // clone'd docs, somehow. This way if code is using the
+ // API sequentially, we match performance of current
+ // trunk (though, really, such code ought to get their
+ // own terms enum and use its seek...)
+ /*
+ @Override
+ public DocsEnum docs(Bits skipDocs, TermRef text) throws IOException {
+ ThreadResources resources = getThreadResources();
+ if (resources.termsEnum.seek(text) == TermsEnum.SeekStatus.FOUND) {
+ return resources.termsEnum.docs(skipDocs);
+ } else {
+ return null;
+ }
+ }
+ */
+
public void close() {
threadResources.close();
}
@@ -318,6 +336,10 @@
return SeekStatus.FOUND;
}
+ // nocommit -- carry over logic from TermInfosReader,
+ // here, that avoids the binary search if the seek
+ // is w/in the current index block
+
// Find latest index term that's <= our text:
indexReader.getIndexOffset(term, indexResult);
Modified: lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestTransactions.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestTransactions.java?rev=882671&r1=882670&r2=882671&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestTransactions.java (original)
+++ lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestTransactions.java Fri Nov 20 18:29:53 2009
@@ -217,6 +217,6 @@
threads[i].join();
for(int i=0;i<numThread;i++)
- assertTrue(!((TimedThread) threads[i]).failed);
+ assertFalse(((TimedThread) threads[i]).failed);
}
}