You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2009/11/24 14:48:12 UTC

svn commit: r883694 - in /lucene/java/branches/lucene_2_9: ./ CHANGES.txt src/java/org/apache/lucene/index/BufferedDeletes.java src/java/org/apache/lucene/index/DocumentsWriter.java src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java

Author: mikemccand
Date: Tue Nov 24 13:48:11 2009
New Revision: 883694

URL: http://svn.apache.org/viewvc?rev=883694&view=rev
Log:
LUCENE-2086 (on 2.9 branch): resolve deleted terms in term-sort order for better performance

Modified:
    lucene/java/branches/lucene_2_9/   (props changed)
    lucene/java/branches/lucene_2_9/CHANGES.txt
    lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/index/BufferedDeletes.java
    lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/index/DocumentsWriter.java
    lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java   (props changed)

Propchange: lucene/java/branches/lucene_2_9/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue Nov 24 13:48:11 2009
@@ -1,2 +1,2 @@
 /lucene/java/branches/lucene_2_4:748824
-/lucene/java/trunk:824125,826029,826385,830871,833095,833297,833886
+/lucene/java/trunk:824125,826029,826385,830871,833095,833297,833886,882672

Modified: lucene/java/branches/lucene_2_9/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/CHANGES.txt?rev=883694&r1=883693&r2=883694&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/CHANGES.txt (original)
+++ lucene/java/branches/lucene_2_9/CHANGES.txt Tue Nov 24 13:48:11 2009
@@ -12,6 +12,11 @@
  * LUCENE-2088: addAttribute() should only accept interfaces that
    extend Attribute. (Shai Erera, Uwe Schindler)
 
+Optimizations
+
+ * LUCENE-2086: When resolving deleted terms, do so in term sort order
+   for better performance (Bogdan Ghidireac via Mike McCandless)
+
 ======================= Release 2.9.1 2009-11-06 =======================
 
 Changes in backwards compatibility policy

Modified: lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/index/BufferedDeletes.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/index/BufferedDeletes.java?rev=883694&r1=883693&r2=883694&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/index/BufferedDeletes.java (original)
+++ lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/index/BufferedDeletes.java Tue Nov 24 13:48:11 2009
@@ -18,6 +18,8 @@
  */
 
 import java.util.HashMap;
+import java.util.Map;
+import java.util.TreeMap;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Iterator;
@@ -32,10 +34,20 @@
  *  previously flushed segments. */
 class BufferedDeletes {
   int numTerms;
-  HashMap terms = new HashMap();
-  HashMap queries = new HashMap();
+  Map terms;
+  Map queries = new HashMap();
   List docIDs = new ArrayList();
   long bytesUsed;
+  private final boolean doTermSort;
+
+  public BufferedDeletes(boolean doTermSort) {
+    this.doTermSort = doTermSort;
+    if (doTermSort) {
+      terms = new TreeMap();
+    } else {
+      terms = new HashMap();
+    }
+  }
 
   // Number of documents a delete term applies to.
   final static class Num {
@@ -103,11 +115,15 @@
                           MergePolicy.OneMerge merge,
                           int mergeDocCount) {
 
-    final HashMap newDeleteTerms;
+    final Map newDeleteTerms;
 
     // Remap delete-by-term
     if (terms.size() > 0) {
-      newDeleteTerms = new HashMap();
+      if (doTermSort) {
+        newDeleteTerms = new TreeMap();
+      } else {
+        newDeleteTerms = new HashMap();
+      }
       Iterator iter = terms.entrySet().iterator();
       while(iter.hasNext()) {
         Entry entry = (Entry) iter.next();

Modified: lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=883694&r1=883693&r2=883694&view=diff
==============================================================================
--- lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/java/branches/lucene_2_9/src/java/org/apache/lucene/index/DocumentsWriter.java Tue Nov 24 13:48:11 2009
@@ -23,6 +23,7 @@
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
+import java.util.Map;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
@@ -222,11 +223,11 @@
 
   // Deletes done after the last flush; these are discarded
   // on abort
-  private BufferedDeletes deletesInRAM = new BufferedDeletes();
+  private BufferedDeletes deletesInRAM = new BufferedDeletes(false);
 
   // Deletes done before the last flush; these are still
   // kept on abort
-  private BufferedDeletes deletesFlushed = new BufferedDeletes();
+  private BufferedDeletes deletesFlushed = new BufferedDeletes(true);
 
   // The max number of delete terms that can be buffered before
   // they must be flushed to disk.
@@ -835,7 +836,7 @@
   }
 
   // for testing
-  synchronized HashMap getBufferedDeleteTerms() {
+  synchronized Map getBufferedDeleteTerms() {
     return deletesInRAM.terms;
   }
 
@@ -966,6 +967,18 @@
     return any;
   }
 
+  // used only by assert
+  private Term lastDeleteTerm;
+
+  // used only by assert
+  private boolean checkDeleteTerm(Term term) {
+    if (term != null) {
+      assert lastDeleteTerm == null || term.compareTo(lastDeleteTerm) > 0: "lastTerm=" + lastDeleteTerm + " vs term=" + term;
+    }
+    lastDeleteTerm = term;
+    return true;
+  }
+
   // Apply buffered delete terms, queries and docIDs to the
   // provided reader
   private final synchronized boolean applyDeletes(IndexReader reader, int docIDStart)
@@ -974,6 +987,8 @@
     final int docEnd = docIDStart + reader.maxDoc();
     boolean any = false;
 
+    assert checkDeleteTerm(null);
+
     // Delete by term
     Iterator iter = deletesFlushed.terms.entrySet().iterator();
     TermDocs docs = reader.termDocs();
@@ -981,7 +996,9 @@
       while (iter.hasNext()) {
         Entry entry = (Entry) iter.next();
         Term term = (Term) entry.getKey();
-
+        // LUCENE-2086: we should be iterating a TreeMap,
+        // here, so terms better be in order:
+        assert checkDeleteTerm(term);
         docs.seek(term);
         int limit = ((BufferedDeletes.Num) entry.getValue()).getNum();
         while (docs.next()) {

Propchange: lucene/java/branches/lucene_2_9/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue Nov 24 13:48:11 2009
@@ -1,2 +1,2 @@
 /lucene/java/branches/lucene_2_4/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java:748824
-/lucene/java/trunk/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java:818920,824125,826029,826385,830871,833095,833297,833886
+/lucene/java/trunk/src/test/org/apache/lucene/analysis/BaseTokenStreamTestCase.java:818920,824125,826029,826385,830871,833095,833297,833886,882672