You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2009/11/20 19:37:43 UTC

svn commit: r882672 - in /lucene/java/trunk: CHANGES.txt src/java/org/apache/lucene/index/BufferedDeletes.java src/java/org/apache/lucene/index/DocumentsWriter.java

Author: mikemccand
Date: Fri Nov 20 18:37:42 2009
New Revision: 882672

URL: http://svn.apache.org/viewvc?rev=882672&view=rev
Log:
LUCENE-2086: resolve deletes-by-term in term sorted order for better performance

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/index/BufferedDeletes.java
    lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java

Modified: lucene/java/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/CHANGES.txt?rev=882672&r1=882671&r2=882672&view=diff
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Fri Nov 20 18:37:42 2009
@@ -17,6 +17,9 @@
 
 Optimizations
 
+* LUCENE-2086: When resolving deleted terms, do so in term sort order
+  for better performance (Bogdan Ghidireac via Mike McCandless)
+
 Build
 
 ======================= Release 3.0.0 2009-11-25 =======================

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/BufferedDeletes.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/BufferedDeletes.java?rev=882672&r1=882671&r2=882672&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/BufferedDeletes.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/BufferedDeletes.java Fri Nov 20 18:37:42 2009
@@ -18,6 +18,8 @@
  */
 
 import java.util.HashMap;
+import java.util.Map;
+import java.util.TreeMap;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map.Entry;
@@ -33,10 +35,20 @@
  *  previously flushed segments. */
 class BufferedDeletes {
   int numTerms;
-  HashMap<Term,Num> terms = new HashMap<Term,Num>();
-  HashMap<Query,Integer> queries = new HashMap<Query,Integer>();
+  Map<Term,Num> terms;
+  Map<Query,Integer> queries = new HashMap<Query,Integer>();
   List<Integer> docIDs = new ArrayList<Integer>();
   long bytesUsed;
+  private final boolean doTermSort;
+
+  public BufferedDeletes(boolean doTermSort) {
+    this.doTermSort = doTermSort;
+    if (doTermSort) {
+      terms = new TreeMap<Term,Num>();
+    } else {
+      terms = new HashMap<Term,Num>();
+    }
+  }
 
   // Number of documents a delete term applies to.
   final static class Num {
@@ -104,11 +116,15 @@
                           MergePolicy.OneMerge merge,
                           int mergeDocCount) {
 
-    final HashMap<Term,Num> newDeleteTerms;
+    final Map<Term,Num> newDeleteTerms;
 
     // Remap delete-by-term
     if (terms.size() > 0) {
-      newDeleteTerms = new HashMap<Term, Num>();
+      if (doTermSort) {
+        newDeleteTerms = new TreeMap<Term,Num>();
+      } else {
+        newDeleteTerms = new HashMap<Term,Num>();
+      }
       for(Entry<Term,Num> entry : terms.entrySet()) {
         Num num = entry.getValue();
         newDeleteTerms.put(entry.getKey(),

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=882672&r1=882671&r2=882672&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/DocumentsWriter.java Fri Nov 20 18:37:42 2009
@@ -23,6 +23,7 @@
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
+import java.util.Map;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map.Entry;
@@ -219,11 +220,11 @@
 
   // Deletes done after the last flush; these are discarded
   // on abort
-  private BufferedDeletes deletesInRAM = new BufferedDeletes();
+  private BufferedDeletes deletesInRAM = new BufferedDeletes(false);
 
   // Deletes done before the last flush; these are still
   // kept on abort
-  private BufferedDeletes deletesFlushed = new BufferedDeletes();
+  private BufferedDeletes deletesFlushed = new BufferedDeletes(true);
 
   // The max number of delete terms that can be buffered before
   // they must be flushed to disk.
@@ -828,7 +829,7 @@
   }
 
   // for testing
-  synchronized HashMap<Term,BufferedDeletes.Num> getBufferedDeleteTerms() {
+  synchronized Map<Term,BufferedDeletes.Num> getBufferedDeleteTerms() {
     return deletesInRAM.terms;
   }
 
@@ -974,7 +975,6 @@
     try {
       for (Entry<Term, BufferedDeletes.Num> entry: deletesFlushed.terms.entrySet()) {
         Term term = entry.getKey();
-
         docs.seek(term);
         int limit = entry.getValue().getNum();
         while (docs.next()) {