You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2009/11/20 19:29:53 UTC

svn commit: r882671 - in /lucene/java/branches/flex_1458/src: java/org/apache/lucene/index/ java/org/apache/lucene/index/codecs/standard/ test/org/apache/lucene/index/

Author: mikemccand
Date: Fri Nov 20 18:29:53 2009
New Revision: 882671

URL: http://svn.apache.org/viewvc?rev=882671&view=rev
Log:
LUCENE-1458 (on flex branch): fix deletes resolution (in IW) to use TermsEnum sequentially

Modified:
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocumentsWriter.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/IndexReader.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermsEnum.java
    lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
    lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestTransactions.java

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=882671&r1=882670&r2=882671&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/DocumentsWriter.java Fri Nov 20 18:29:53 2009
@@ -977,19 +977,45 @@
 
     // Delete by term
     try {
+      Fields fields = reader.fields();
+      TermsEnum termsEnum = null;
+
+      String currentField = null;
+      TermRef termRef = new TermRef();
       for (Entry<Term, BufferedDeletes.Num> entry: deletesFlushed.terms.entrySet()) {
         Term term = entry.getKey();
 
-        DocsEnum docs = reader.termDocsEnum(reader.getDeletedDocs(), term.field, new TermRef(term.text));
-        if (docs != null) {
-          int limit = entry.getValue().getNum();
-          while (true) {
-            final int docID = docs.next();
-            if (docID == DocsEnum.NO_MORE_DOCS || docIDStart+docID >= limit) {
-              break;
+        // Since we visit terms sorted, we gain performance
+        // by re-using the same TermsEnum and seeking only
+        // forwards
+        if (term.field() != currentField) {
+          currentField = term.field();
+          Terms terms = fields.terms(currentField);
+          if (terms != null) {
+            termsEnum = terms.iterator();
+          } else {
+            termsEnum = null;
+          }
+        }
+
+        if (termsEnum == null) {
+          continue;
+        }
+
+        termRef.copy(term.text());
+        if (termsEnum.seek(termRef) == TermsEnum.SeekStatus.FOUND) {
+          DocsEnum docs = termsEnum.docs(reader.getDeletedDocs());
+
+          if (docs != null) {
+            int limit = entry.getValue().getNum();
+            while (true) {
+              final int docID = docs.next();
+              if (docID == DocsEnum.NO_MORE_DOCS || docIDStart+docID >= limit) {
+                break;
+              }
+              reader.deleteDocument(docID);
+              any = true;
             }
-            reader.deleteDocument(docID);
-            any = true;
           }
         }
       }

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/IndexReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/IndexReader.java?rev=882671&r1=882670&r2=882671&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/IndexReader.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/IndexReader.java Fri Nov 20 18:29:53 2009
@@ -927,6 +927,7 @@
   }
   private static final NullDocsEnum nullDocsEnum = new NullDocsEnum();
 
+  // nocommit -- tap into per-thread cache, here?
   // nocommit -- should we return null or NullDocsEnum?
   /** Returns DocsEnum for the specified field & term. */
   public DocsEnum termDocsEnum(Bits skipDocs, String field, TermRef term) throws IOException {

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermsEnum.java?rev=882671&r1=882670&r2=882671&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermsEnum.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermsEnum.java Fri Nov 20 18:29:53 2009
@@ -74,6 +74,7 @@
    *  {@link SeekStatus#END}.*/
   public abstract int docFreq();
 
+  // nocommit -- clarify if this may return null
   /** Get {@link DocsEnum} for the current term.  The
    *  returned {@link DocsEnum} may share state with this
    *  TermsEnum instance, so you should not call this

Modified: lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java?rev=882671&r1=882670&r2=882671&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java (original)
+++ lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java Fri Nov 20 18:29:53 2009
@@ -231,6 +231,24 @@
       }
     }
 
+    // nocommit -- figure out how to do this one: we want to
+    // reuse the thread private TermsEnum, but, get a
+    // clone'd docs, somehow.  This way if code is using the
+    // API sequentially, we match performance of current
+    // trunk (though, really, such code ought to get their
+    // own terms enum and use its seek...)
+    /*
+    @Override
+    public DocsEnum docs(Bits skipDocs, TermRef text) throws IOException {
+      ThreadResources resources = getThreadResources();
+      if (resources.termsEnum.seek(text) == TermsEnum.SeekStatus.FOUND) {
+        return resources.termsEnum.docs(skipDocs);
+      } else {
+        return null;
+      }
+    }
+    */
+    
     public void close() {
       threadResources.close();
     }
@@ -318,6 +336,10 @@
           return SeekStatus.FOUND;
         }
 
+        // nocommit -- carry over logic from TermInfosReader,
+        // here, that avoids the binary search if the seek
+        // is w/in the current index block
+
         // Find latest index term that's <= our text:
         indexReader.getIndexOffset(term, indexResult);
 

Modified: lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestTransactions.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestTransactions.java?rev=882671&r1=882670&r2=882671&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestTransactions.java (original)
+++ lucene/java/branches/flex_1458/src/test/org/apache/lucene/index/TestTransactions.java Fri Nov 20 18:29:53 2009
@@ -217,6 +217,6 @@
       threads[i].join();
 
     for(int i=0;i<numThread;i++)
-      assertTrue(!((TimedThread) threads[i]).failed);
+      assertFalse(((TimedThread) threads[i]).failed);
   }
 }