You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2009/11/24 14:48:12 UTC
svn commit: r883694 - in /lucene/java/branches/lucene_2_9: ./ CHANGES.txt
src/java/org/apache/lucene/index/BufferedDeletes.java
src/java/org/apache/lucene/index/DocumentsWriter.java
src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
Author: mikemccand
Date: Tue Nov 24 13:48:11 2009
New Revision: 883694
URL: http://svn.apache.org/viewvc?rev=883694&view=rev
Log:
LUCENE-2086 (on 2.9 branch): resolve deleted terms in term-sort order for better performance
Modified:
lucene/java/branches/lucene_2_9/ (props changed)
lucene/java/branches/lucene_2_9/CHANGES.txt
lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/index/BufferedDeletes.java
lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/index/DocumentsWriter.java
lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (props changed)
Propchange: lucene/java/branches/lucene_2_9/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue Nov 24 13:48:11 2009
@@ -1,2 +1,2 @@
/lucene/java/branches/lucene_2_4:748824
-/lucene/java/trunk:824125,826029,826385,830871,833095,833297,833886
+/lucene/java/trunk:824125,826029,826385,830871,833095,833297,833886,882672
Modified: lucene/java/branches/lucene_2_9/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/CHANGES.txt?rev=883694&r1=883693&r2=883694&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/CHANGES.txt (original)
+++ lucene/java/branches/lucene_2_9/CHANGES.txt Tue Nov 24 13:48:11 2009
@@ -12,6 +12,11 @@
* LUCENE-2088: addAttribute() should only accept interfaces that
extend Attribute. (Shai Erera, Uwe Schindler)
+Optimizations
+
+ * LUCENE-2086: When resolving deleted terms, do so in term sort order
+ for better performance (Bogdan Ghidireac via Mike McCandless)
+
======================= Release 2.9.1 2009-11-06 =======================
Changes in backwards compatibility policy
Modified: lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/index/BufferedDeletes.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/index/BufferedDeletes.java?rev=883694&r1=883693&r2=883694&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/index/BufferedDeletes.java (original)
+++ lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/index/BufferedDeletes.java Tue Nov 24 13:48:11 2009
@@ -18,6 +18,8 @@
*/
import java.util.HashMap;
+import java.util.Map;
+import java.util.TreeMap;
import java.util.ArrayList;
import java.util.List;
import java.util.Iterator;
@@ -32,10 +34,20 @@
* previously flushed segments. */
class BufferedDeletes {
int numTerms;
- HashMap terms = new HashMap();
- HashMap queries = new HashMap();
+ Map terms;
+ Map queries = new HashMap();
List docIDs = new ArrayList();
long bytesUsed;
+ private final boolean doTermSort;
+
+ public BufferedDeletes(boolean doTermSort) {
+ this.doTermSort = doTermSort;
+ if (doTermSort) {
+ terms = new TreeMap();
+ } else {
+ terms = new HashMap();
+ }
+ }
// Number of documents a delete term applies to.
final static class Num {
@@ -103,11 +115,15 @@
MergePolicy.OneMerge merge,
int mergeDocCount) {
- final HashMap newDeleteTerms;
+ final Map newDeleteTerms;
// Remap delete-by-term
if (terms.size() > 0) {
- newDeleteTerms = new HashMap();
+ if (doTermSort) {
+ newDeleteTerms = new TreeMap();
+ } else {
+ newDeleteTerms = new HashMap();
+ }
Iterator iter = terms.entrySet().iterator();
while(iter.hasNext()) {
Entry entry = (Entry) iter.next();
Modified: lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=883694&r1=883693&r2=883694&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/index/DocumentsWriter.java Tue Nov 24 13:48:11 2009
@@ -23,6 +23,7 @@
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
+import java.util.Map;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
@@ -222,11 +223,11 @@
// Deletes done after the last flush; these are discarded
// on abort
- private BufferedDeletes deletesInRAM = new BufferedDeletes();
+ private BufferedDeletes deletesInRAM = new BufferedDeletes(false);
// Deletes done before the last flush; these are still
// kept on abort
- private BufferedDeletes deletesFlushed = new BufferedDeletes();
+ private BufferedDeletes deletesFlushed = new BufferedDeletes(true);
// The max number of delete terms that can be buffered before
// they must be flushed to disk.
@@ -835,7 +836,7 @@
}
// for testing
- synchronized HashMap getBufferedDeleteTerms() {
+ synchronized Map getBufferedDeleteTerms() {
return deletesInRAM.terms;
}
@@ -966,6 +967,18 @@
return any;
}
+ // used only by assert
+ private Term lastDeleteTerm;
+
+ // used only by assert
+ private boolean checkDeleteTerm(Term term) {
+ if (term != null) {
+ assert lastDeleteTerm == null || term.compareTo(lastDeleteTerm) > 0: "lastTerm=" + lastDeleteTerm + " vs term=" + term;
+ }
+ lastDeleteTerm = term;
+ return true;
+ }
+
// Apply buffered delete terms, queries and docIDs to the
// provided reader
private final synchronized boolean applyDeletes(IndexReader reader, int docIDStart)
@@ -974,6 +987,8 @@
final int docEnd = docIDStart + reader.maxDoc();
boolean any = false;
+ assert checkDeleteTerm(null);
+
// Delete by term
Iterator iter = deletesFlushed.terms.entrySet().iterator();
TermDocs docs = reader.termDocs();
@@ -981,7 +996,9 @@
while (iter.hasNext()) {
Entry entry = (Entry) iter.next();
Term term = (Term) entry.getKey();
-
+ // LUCENE-2086: we should be iterating a TreeMap,
+ // here, so terms better be in order:
+ assert checkDeleteTerm(term);
docs.seek(term);
int limit = ((BufferedDeletes.Num) entry.getValue()).getNum();
while (docs.next()) {
Propchange: lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue Nov 24 13:48:11 2009
@@ -1,2 +1,2 @@
/lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java:748824
-/lucene/java/trunk/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java:818920,824125,826029,826385,830871,833095,833297,833886
+/lucene/java/trunk/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java:818920,824125,826029,826385,830871,833095,833297,833886,882672