You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/08/03 01:05:29 UTC

svn commit: r1368749 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/core/ lucene/core/src/java/org/apache/lucene/index/ lucene/core/src/java/org/apache/lucene/search/ lucene/core/src/test/org/apache/lucene/index/

Author: mikemccand
Date: Thu Aug  2 23:05:29 2012
New Revision: 1368749

URL: http://svn.apache.org/viewvc?rev=1368749&view=rev
Log:
LUCENE-4203: add IndexWriter.tryDeleteDocument

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_4x/lucene/core/   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/AtomicReaderContext.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/IndexReaderContext.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/NRTManager.java
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java

Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1368749&r1=1368748&r2=1368749&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Thu Aug  2 23:05:29 2012
@@ -42,6 +42,11 @@ New features
   implementations to optimize the enum implementation.  (Robert Muir,
   Mike McCandless)
 
+* LUCENE-4203: Add IndexWriter.tryDeleteDocument(AtomicReader reader,
+  int docID), to attempt deletion by docID as long as the provided
+  reader is an NRT reader, and the segment has not yet been merged
+  away (Mike McCandless).
+
 API Changes
 
 * LUCENE-4138: update of morfologik (Polish morphological analyzer) to 1.5.3.

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/AtomicReaderContext.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/AtomicReaderContext.java?rev=1368749&r1=1368748&r2=1368749&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/AtomicReaderContext.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/AtomicReaderContext.java Thu Aug  2 23:05:29 2012
@@ -1,8 +1,5 @@
 package org.apache.lucene.index;
 
-import java.util.Collections;
-import java.util.List;
-
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -20,6 +17,9 @@ import java.util.List;
  * limitations under the License.
  */
 
+import java.util.Collections;
+import java.util.List;
+
 /**
  * {@link IndexReaderContext} for {@link AtomicReader} instances
  * @lucene.experimental
@@ -51,8 +51,9 @@ public final class AtomicReaderContext e
   
   @Override
   public List<AtomicReaderContext> leaves() {
-    if (!isTopLevel)
+    if (!isTopLevel) {
       throw new UnsupportedOperationException("This is not a top-level context.");
+    }
     assert leaves != null;
     return leaves;
   }

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/IndexReaderContext.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/IndexReaderContext.java?rev=1368749&r1=1368748&r2=1368749&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/IndexReaderContext.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/IndexReaderContext.java Thu Aug  2 23:05:29 2012
@@ -1,7 +1,5 @@
 package org.apache.lucene.index;
 
-import java.util.List;
-
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -19,6 +17,8 @@ import java.util.List;
  * limitations under the License.
  */
 
+import java.util.List;
+
 /**
  * A struct like class that represents a hierarchical relationship between
  * {@link IndexReader} instances. 

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java?rev=1368749&r1=1368748&r2=1368749&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java Thu Aug  2 23:05:29 2012
@@ -1243,6 +1243,78 @@ public class IndexWriter implements Clos
     }
   }
 
+  /** Expert: attempts to delete by document ID, as long as
+   *  the provided reader is a near-real-time reader (from {@link
+   *  DirectoryReader#open(IndexWriter,boolean)}).  If the
+   *  provided reader is an NRT reader obtained from this
+   *  writer, and its segment has not been merged away, then
+   *  the delete succeeds and this method returns true; else, it
+   *  returns false the caller must then separately delete by
+   *  Term or Query.
+   *
+   *  <b>NOTE</b>: this method can only delete documents
+   *  visible to the currently open NRT reader.  If you need
+   *  to delete documents indexed after opening the NRT
+   *  reader you must use the other deleteDocument methods
+   *  (e.g., {@link #deleteDocuments(Term)}). */
+  public synchronized boolean tryDeleteDocument(IndexReader readerIn, int docID) throws IOException {
+
+    final AtomicReader reader;
+    if (readerIn instanceof AtomicReader) {
+      // Reader is already atomic: use the incoming docID:
+      reader = (AtomicReader) readerIn;
+    } else {
+      // Composite reader: lookup sub-reader and re-base docID:
+      List<AtomicReaderContext> leaves = readerIn.getTopReaderContext().leaves();
+      int subIndex = ReaderUtil.subIndex(docID, leaves);
+      reader = leaves.get(subIndex).reader();
+      docID -= leaves.get(subIndex).docBase;
+      assert docID >= 0;
+      assert docID < reader.maxDoc();
+    }
+
+    if (!(reader instanceof SegmentReader)) {
+      throw new IllegalArgumentException("the reader must be a SegmentReader or composite reader containing only SegmentReaders");
+    }
+      
+    final SegmentInfoPerCommit info = ((SegmentReader) reader).getSegmentInfo();
+
+    // TODO: this is a slow linear search, but, number of
+    // segments should be contained unless something is
+    // seriously wrong w/ the index, so it should be a minor
+    // cost:
+
+    if (segmentInfos.indexOf(info) != -1) {
+      ReadersAndLiveDocs rld = readerPool.get(info, false);
+      if (rld != null) {
+        synchronized(bufferedDeletesStream) {
+          rld.initWritableLiveDocs();
+          if (rld.delete(docID)) {
+            final int fullDelCount = rld.info.getDelCount() + rld.getPendingDeleteCount();
+            if (fullDelCount == rld.info.info.getDocCount()) {
+              // If a merge has already registered for this
+              // segment, we leave it in the readerPool; the
+              // merge will skip merging it and will then drop
+              // it once it's done:
+              if (!mergingSegments.contains(rld.info)) {
+                segmentInfos.remove(rld.info);
+                readerPool.drop(rld.info);
+                checkpoint();
+              }
+            }
+          }
+          //System.out.println("  yes " + info.info.name + " " + docID);
+          return true;
+        }
+      } else {
+        //System.out.println("  no rld " + info.info.name + " " + docID);
+      }
+    } else {
+      //System.out.println("  no seg " + info.info.name + " " + docID);
+    }
+    return false;
+  }
+
   /**
    * Deletes the document(s) containing any of the
    * terms. All given deletes are applied and flushed atomically

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/NRTManager.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/NRTManager.java?rev=1368749&r1=1368748&r2=1368749&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/NRTManager.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/search/NRTManager.java Thu Aug  2 23:05:29 2012
@@ -27,6 +27,7 @@ import java.util.concurrent.locks.Reentr
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.SegmentInfoPerCommit;
 import org.apache.lucene.index.IndexReader; // javadocs
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexableField;
@@ -254,6 +255,14 @@ public class NRTManager extends Referenc
     long getAndIncrementGeneration() {
       return indexingGen.getAndIncrement();
     }
+
+    public long tryDeleteDocument(IndexReader reader, int docID) throws IOException {
+      if (writer.tryDeleteDocument(reader, docID)) {
+        return indexingGen.get();
+      } else {
+        return -1;
+      }
+    }
   }
 
   /**

Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java?rev=1368749&r1=1368748&r2=1368749&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/index/TestRollingUpdates.java Thu Aug  2 23:05:29 2012
@@ -23,6 +23,9 @@ import org.apache.lucene.analysis.MockAn
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
 import org.apache.lucene.document.*;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.*;
 import org.apache.lucene.util.*;
 import org.junit.Test;
@@ -48,10 +51,13 @@ public class TestRollingUpdates extends 
     final int SIZE = atLeast(20);
     int id = 0;
     IndexReader r = null;
+    IndexSearcher s = null;
     final int numUpdates = (int) (SIZE * (2+(TEST_NIGHTLY ? 200*random().nextDouble() : 5*random().nextDouble())));
     if (VERBOSE) {
       System.out.println("TEST: numUpdates=" + numUpdates);
     }
+    int updateCount = 0;
+    // TODO: sometimes update ids not in order...
     for(int docIter=0;docIter<numUpdates;docIter++) {
       final Document doc = docs.nextDoc();
       final String myID = ""+id;
@@ -60,16 +66,59 @@ public class TestRollingUpdates extends 
       } else {
         id++;
       }
+      if (VERBOSE) {
+        System.out.println("  docIter=" + docIter + " id=" + id);
+      }
       ((Field) doc.getField("docid")).setStringValue(myID);
-      w.updateDocument(new Term("docid", myID), doc);
+
+      Term idTerm = new Term("docid", myID);
+
+      final boolean doUpdate;
+      if (s != null && updateCount < SIZE) {
+        TopDocs hits = s.search(new TermQuery(idTerm), 1);
+        assertEquals(1, hits.totalHits);
+        doUpdate = !w.tryDeleteDocument(r, hits.scoreDocs[0].doc);
+        if (VERBOSE) {
+          if (doUpdate) {
+            System.out.println("  tryDeleteDocument failed");
+          } else {
+            System.out.println("  tryDeleteDocument succeeded");
+          }
+        }
+      } else {
+        doUpdate = true;
+        if (VERBOSE) {
+          System.out.println("  no searcher: doUpdate=true");
+        }
+      }
+
+      updateCount++;
+
+      if (doUpdate) {
+        w.updateDocument(idTerm, doc);
+      } else {
+        w.addDocument(doc);
+      }
 
       if (docIter >= SIZE && random().nextInt(50) == 17) {
         if (r != null) {
           r.close();
         }
+
         final boolean applyDeletions = random().nextBoolean();
+
+        if (VERBOSE) {
+          System.out.println("TEST: reopen applyDeletions=" + applyDeletions);
+        }
+
         r = w.getReader(applyDeletions);
+        if (applyDeletions) {
+          s = new IndexSearcher(r);
+        } else {
+          s = null;
+        }
         assertTrue("applyDeletions=" + applyDeletions + " r.numDocs()=" + r.numDocs() + " vs SIZE=" + SIZE, !applyDeletions || r.numDocs() == SIZE);
+        updateCount = 0;
       }
     }